Source code for pyleoclim.core.lipd

"""
This allows to manipulate LiPD objects and take advantage of the metadata information for specific functionalities. Lipd objects are needed to create LipdSeries objects, which carry most of the timeseries functionalities.
"""

from ..utils import mapping, lipdutils

from ..core.lipdseries import LipdSeries
from copy import deepcopy
import warnings
import os

import lipd as lpd

[docs]class Lipd: '''The Lipd class allows to create a Lipd object from Lipd files. This allows to manipulate LiPD objects and take advantage of the metadata information for specific functionalities. Lipd objects are needed to create LipdSeries objects, which carry most of the timeseries functionalities. Parameters ---------- usr_path : str Path to the Lipd file(s). Can be URL (LiPD utilities only support loading one file at a time from a URL). If it's a URL, it must start with "http", "https", or "ftp". lidp_dict : dict LiPD files already loaded into Python through the LiPD utilities validate : bool Validate the LiPD files upon loading. Note that for a large library (>300files) this can take up to half an hour. remove : bool If validate is True and remove is True, ignores non-valid LiPD files. Note that loading unvalidated Lipd files may result in errors for some functionalities but not all. TODO ---- Support querying the LinkedEarth platform References ---------- McKay, N. P., & Emile-Geay, J. (2016). Technical Note: The Linked Paleo Data framework – a common tongue for paleoclimatology. Climate of the Past, 12, 1093-1100. Examples -------- .. ipython:: python :okwarning: :okexcept: import pyleoclim as pyleo url='http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' d=pyleo.Lipd(usr_path=url) ''' def __init__(self, usr_path=None, lipd_dict=None, validate=False, remove=False): self.plot_default = {'ice-other': ['#FFD600','h'], 'ice/rock': ['#FFD600', 'h'], 'coral': ['#FF8B00','o'], 'documents':['k','p'], 'glacierice':['#86CDFA', 'd'], 'hybrid': ['#00BEFF','*'], 'lakesediment': ['#4169E0','s'], 'marinesediment': ['#8A4513', 's'], 'sclerosponge' : ['r','o'], 'speleothem' : ['#FF1492','d'], 'wood' : ['#32CC32','^'], 'molluskshells' : ['#FFD600','h'], 'peat' : ['#2F4F4F','*'], 'midden' : ['#824E2B','o'], 'other':['k','o']} if validate==False and remove==True: print('Removal of unvalidated LiPD files require validation') validate=True #prepare the dictionaries for all possible scenarios if usr_path!=None: # since readLipd() takes only absolute path and it will change the current working directory (CWD) without turning back, # we need to record CWD manually and turn back after the data loading is finished cwd = os.getcwd() if usr_path[:4] == 'http' or usr_path[:3] == 'ftp': # URL D_path = lpd.readLipd(usr_path) else: # local path abs_path = os.path.abspath(usr_path) D_path = lpd.readLipd(abs_path) os.chdir(cwd) #make sure that it's more than one if 'archiveType' in D_path.keys(): D_path={D_path['dataSetName']:D_path} if validate==True: cwd = os.getcwd() res=lpd.validate(D_path,detailed=False) os.chdir(cwd) if remove == True: for item in res: if item['status'] == 'FAIL': c=item['feedback']['errMsgs'] check = [] for i in c: if i.startswith('Mismatched columns'): check.append(1) else: check.append(0) if 0 in check: del D_path[item['filename'].strip('.lpd')] else: D_path={} if lipd_dict!=None: D_dict=lipd_dict if 'archiveType' in D_dict.keys(): D_dict={D_dict['dataSetName']:D_dict} if validate==True: cwd = os.getcwd() res=lpd.validate(D_dict,detailed=False) os.chdir(cwd) if remove == True: for item in res: if item['status'] == 'FAIL': c=item['feedback']['errMsgs'] check = [] for i in c: if i.startswith('Mismatched columns'): check.append(1) else: check.append(0) if 0 in check: del D_dict[item['filename'].strip('.lpd')] else: D_dict={} # raise an error if empty if not bool(D_dict) and not bool(D_path) == True: raise ValueError('No valid files; try without validation.') #assemble self.lipd={} self.lipd.update(D_path) self.lipd.update(D_dict) def __repr__(self): return str(self.__dict__)
[docs] def copy(self): '''Copy the object ''' return deepcopy(self)
[docs] def to_tso(self, mode='paleo'): '''Extracts all the variables to a list of LiPD timeseries objects In LiPD, timeseries objects are flatten dictionaries that contain the values for the time and variable axes as well as relevant metadata. Parameters ---------- mode : {'paleo','chron'} Whether to extract the timeseries information from the paleo tables or chron tables Returns ------- ts_list : list List of LiPD timeseries objects References ---------- McKay, N. P., & Emile-Geay, J. (2016). Technical Note: The Linked Paleo Data framework – a common tongue for paleoclimatology. Climate of the Past, 12, 1093-1100. ''' cwd = os.getcwd() ts_list=lpd.extractTs(self.__dict__['lipd'], mode=mode) os.chdir(cwd) return ts_list
[docs] def extract(self,dataSetName): ''' Parameters ---------- dataSetName : str Extract a particular dataset Returns ------- new : pyleoclim.Lipd A new object corresponding to a particular dataset ''' new = self.copy() try: dict_out=self.__dict__['lipd'][dataSetName] new.lipd=dict_out except: pass return new
[docs] def to_LipdSeriesList(self, mode='paleo'): '''Extracts all LiPD timeseries objects to a list of LipdSeries objects In LiPD, timeseries objects are flatten dictionaries that contain the values for the time and variable axes as well as relevant metadata. Parameters ---------- mode : {'paleo','chron'} Whether to extract the timeseries information from the paleo tables or chron tables Returns ------- res : list A list of LiPDSeries objects References ---------- McKay, N. P., & Emile-Geay, J. (2016). Technical Note: The Linked Paleo Data framework – a common tongue for paleoclimatology. Climate of the Past, 12, 1093-1100. See also -------- pyleoclim.core.lipdseries.LipdSeries : a LipdSeries object ''' cwd = os.getcwd() ts_list=lpd.extractTs(self.__dict__['lipd'], mode=mode) os.chdir(cwd) res=[] for idx, item in enumerate(ts_list): try: res.append(LipdSeries(item)) except: if mode == 'paleo': txt = 'The timeseries from ' + str(idx) + ': ' +\ item['dataSetName'] + ': ' + \ item['paleoData_variableName'] + \ ' could not be coerced into a LipdSeries object, passing' else: txt = 'The timeseries from ' + str(idx) + ': ' +\ item['dataSetName'] + ': ' + \ item['chronData_variableName'] + \ ' could not be coerced into a LipdSeries object, passing' warnings.warn(txt) pass return res
[docs] def to_LipdSeries(self, number = None, mode = 'paleo'): '''Extracts one timeseries from the Lipd object In LiPD, timeseries objects are flatten dictionaries that contain the values for the time and variable axes as well as relevant metadata. Note that this function may require user interaction if the number of the column in the file is unknown. The numbers are fixed so automating the code is as simple as retaining a series of numbers when reopening the files. Parameters ---------- number : int the number of the timeseries object mode : str; {'paleo','chron'} whether to extract the paleo or chron series. Returns ------- ts : pyleoclim.LipdSeries A LipdSeries object See also -------- pyleoclim.core.lipdseries.LipdSeries : LipdSeries object ''' cwd = os.getcwd() ts_list = lpd.extractTs(self.__dict__['lipd'], mode=mode) os.chdir(cwd) if number is None: ts = LipdSeries(ts_list) else: try: number = int(number) except: raise TypeError('Number needs to be an integer or should be coerced into an integer.') ts = LipdSeries(ts_list[number]) return ts
[docs] def mapAllArchive(self, projection = 'Robinson', proj_default = True, background = True,borders = False, rivers = False, lakes = False, figsize = None, ax = None, marker=None, color=None, markersize = None, scatter_kwargs=None, legend=True, lgd_kwargs=None, savefig_settings=None): '''Map all the records contained in the LiPD object by the type of archive Note that the map is fully cusomizable by using the optional parameters. Parameters ---------- projection : str, optional The projection to use. The default is 'Robinson'. proj_default : bool, optional Wether to use the Pyleoclim defaults for each projection type. The default is True. background : bool, optional Wether to use a backgound. The default is True. borders : bool, optional Draw borders. The default is False. rivers : bool, optional Draw rivers. The default is False. lakes : bool, optional Draw lakes. The default is False. figsize : list, optional The size of the figure. The default is None. ax : matplotlib.ax, optional The matplotlib axis onto which to return the map. The default is None. marker : str, optional The marker type for each archive. The default is None, which uses a pre-defined palette in Pyleoclim. To see the default option, run Lipd.plot_default where Lipd is the name of the object. color : str, optional Color for each acrhive. The default is None. The default is None, which uses a pre-defined palette in Pyleoclim. To see the default option, run Lipd.plot_default where Lipd is the name of the object. markersize : float, optional Size of the marker. The default is None. scatter_kwargs : dict, optional Parameters for the scatter plot. The default is None. legend : bool; {True,False}, optional Whether to plot the legend. The default is True. lgd_kwargs : dict, optional Arguments for the legend. The default is None. savefig_settings : dictionary, optional The dictionary of arguments for plt.savefig(); some notes below: - "path" must be specified; it can be any existing or non-existing path, with or without a suffix; if the suffix is not given in "path", it will follow "format" - "format" can be one of {"pdf", "eps", "png", "ps"}. The default is None. Returns ------- res : tuple or fig The figure and axis if asked. See also -------- pyleoclim.utils.mapping.map : Underlying mapping function for Pyleoclim Examples -------- For speed, we are only using one LiPD file. But these functions can load and map multiple. .. ipython:: python :okwarning: :okexcept: import pyleoclim as pyleo url = 'http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' data = pyleo.Lipd(usr_path = url) @savefig mapallarchive.png fig, ax = data.mapAllArchive() pyleo.closefig(fig) Change the markersize .. ipython:: python :okwarning: :okexcept: import pyleoclim as pyleo url = 'http://wiki.linked.earth/wiki/index.php/Special:WTLiPD?op=export&lipdid=MD982176.Stott.2004' data = pyleo.Lipd(usr_path = url) @savefig mapallarchive_marker.png fig, ax = data.mapAllArchive(markersize=100) pyleo.closefig(fig) ''' scatter_kwargs = {} if scatter_kwargs is None else scatter_kwargs.copy() #get the information from the LiPD dict lat=[] lon=[] archiveType=[] for idx, key in enumerate(self.lipd): d = self.lipd[key] lat.append(d['geo']['geometry']['coordinates'][1]) lon.append(d['geo']['geometry']['coordinates'][0]) if 'archiveType' in d.keys(): archiveType.append(lipdutils.LipdToOntology(d['archiveType']).lower().replace(" ","")) else: archiveType.append('other') # make sure criteria is in the plot_default list for idx,val in enumerate(archiveType): if val not in self.plot_default.keys(): archiveType[idx] = 'other' if markersize is not None: scatter_kwargs.update({'s': markersize}) if marker==None: marker=[] for item in archiveType: marker.append(self.plot_default[item][1]) if color==None: color=[] for item in archiveType: color.append(self.plot_default[item][0]) res = mapping.map(lat=lat, lon=lon, criteria=archiveType, marker=marker, color =color, projection = projection, proj_default = proj_default, background = background,borders = borders, rivers = rivers, lakes = lakes, figsize = figsize, ax = ax, scatter_kwargs=scatter_kwargs, legend=legend, lgd_kwargs=lgd_kwargs,savefig_settings=savefig_settings) return res