Source code for pastastore.plotting.plots

"""Module containing all the plotting methods for PastaStore.

Pastastore comes with a number helpful plotting methods to quickly
visualize time series contained in the store. Plotting time series or data availability
is available through the `plots` attribute of the PastaStore object. For example, if we
have a :class:`pastastore.PastaStore` called `pstore` linking to an existing database,
the plot methods are available as follows::

    pstore.plots.oseries()

"""

import logging

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pastas as ps
from matplotlib.colors import BoundaryNorm, LogNorm

logger = logging.getLogger(__name__)


[docs] class Plots: """Plot class for Pastastore. Allows plotting of time series and data availability. """ def __init__(self, pstore): """Initialize Plots class for Pastastore. Parameters ---------- pstore : pastastore.Pastastore Pastastore object """ self.pstore = pstore def __repr__(self): """Return string representation of Plots submodule.""" methods = "".join( [f"\n - {meth}" for meth in dir(self) if not meth.startswith("_")] ) return "Plotting submodule, available methods:" + methods
[docs] def _timeseries( self, libname, names=None, ax=None, split=False, figsize=(10, 5), progressbar=True, show_legend=True, labelfunc=None, legend_kwargs=None, **kwargs, ): """Plot time series from pastastore (internal method). Parameters ---------- libname : str name of the library to obtain time series from (oseries or stresses) names : list[str], optional list of time series names to plot, by default None ax : matplotlib.Axes, optional pass axes object to plot on existing axes, by default None, which creates a new figure split : bool, optional create a separate subplot for each time series, by default False. A maximum of 20 time series is supported when split=True. figsize : tuple, optional figure size, by default (10, 5) progressbar : bool, optional show progressbar when loading time series from store, by default True show_legend : bool, optional show legend, default is True. labelfunc : callable, optional function to create custom labels, function should take name of time series as input legend_kwargs : dict, optional additional arguments to pass to legend Returns ------- ax : matplotlib.Axes axes handle Raises ------ ValueError split=True is only supported if there are less than 20 time series to plot. """ names = self.pstore.conn.parse_names(names, libname) if len(names) > 20 and split: raise ValueError( "More than 20 time series leads to too many subplots, set split=False." ) if ax is None: if split: _, axes = plt.subplots(len(names), 1, sharex=True, figsize=figsize) else: _, axes = plt.subplots(1, 1, figsize=figsize) else: axes = ax tsdict = self.pstore.conn._get_series( # noqa: SLF001 libname, names, progressbar=progressbar, squeeze=False ) for i, (n, ts) in enumerate(tsdict.items()): if split and ax is None: iax = axes[i] elif ax is None: iax = axes else: iax = ax if labelfunc is not None: n = labelfunc(n) iax.plot(ts.index, ts.squeeze(), label=n, **kwargs) if split and show_legend: iax.legend(loc="best", fontsize="x-small") if not split and show_legend: if legend_kwargs is None: legend_kwargs = {} ncol = legend_kwargs.pop("ncol", 7) fontsize = legend_kwargs.pop("fontsize", "x-small") axes.legend(loc=(0, 1), frameon=False, ncol=ncol, fontsize=fontsize) return axes
[docs] def oseries( self, names=None, ax=None, split=False, figsize=(10, 5), show_legend=True, labelfunc=None, legend_kwargs=None, **kwargs, ): """Plot oseries. Parameters ---------- names : list[str], optional list of oseries names to plot, by default None, which loads all oseries from store ax : matplotlib.Axes, optional pass axes object to plot oseries on existing figure, by default None, in which case a new figure is created split : bool, optional create a separate subplot for each time series, by default False. A maximum of 20 time series is supported when split=True. figsize : tuple, optional figure size, by default (10, 5) show_legend : bool, optional show legend, default is True. labelfunc : callable, optional function to create custom labels, function should take name of time series as input legend_kwargs : dict, optional additional arguments to pass to legend Returns ------- ax : matplotlib.Axes axes handle """ return self._timeseries( "oseries", names=names, ax=ax, split=split, figsize=figsize, show_legend=show_legend, labelfunc=labelfunc, legend_kwargs=legend_kwargs, **kwargs, )
[docs] def stresses( self, names=None, kind=None, ax=None, split=False, figsize=(10, 5), show_legend=True, labelfunc=None, legend_kwargs=None, **kwargs, ): """Plot stresses. Parameters ---------- names : list[str], optional list of oseries names to plot, by default None, which loads all oseries from store kind : str, optional only plot stresses of a certain kind, by default None, which includes all stresses ax : matplotlib.Axes, optional pass axes object to plot oseries on existing figure, by default None, in which case a new figure is created split : bool, optional create a separate subplot for each time series, by default False. A maximum of 20 time series is supported when split=True. figsize : tuple, optional figure size, by default (10, 5) show_legend : bool, optional show legend, default is True. labelfunc : callable, optional function to create custom labels, function should take name of time series as input legend_kwargs : dict, optional additional arguments to pass to legend Returns ------- ax : matplotlib.Axes axes handle """ names = self.pstore.conn.parse_names(names, "stresses") masknames = self.pstore.stresses.index.isin(names) stresses = self.pstore.stresses.loc[masknames] if kind: mask = stresses["kind"] == kind names = stresses.loc[mask].index.to_list() return self._timeseries( "stresses", names=names, ax=ax, split=split, figsize=figsize, show_legend=show_legend, labelfunc=labelfunc, legend_kwargs=legend_kwargs, **kwargs, )
[docs] def data_availability( self, libname, names=None, kind=None, intervals=None, ignore=("second", "minute", "14 days"), ax=None, cax=None, normtype="log", cmap="viridis_r", set_yticks=False, figsize=(10, 8), progressbar=True, dropna=True, **kwargs, ): """Plot the data-availability for multiple time series in pastastore. Parameters ---------- libname : str name of library to get time series from (oseries or stresses) names : list, optional specify names in a list to plot data availability for certain time series kind : str, optional if library is stresses, kind can be specified to obtain only stresses of a specific kind intervals: dict, optional A dict with frequencies as keys and number of seconds as values ignore : list, optional A list with frequencies in intervals to ignore ax: matplotlib Axes, optional pass axes object to plot data availability on existing figure. by default None, in which case a new figure is created cax: matplotlib Axes, optional pass object axes to plot the colorbar on. by default None, which gives default Maptlotlib behavior normtype : str, optional Determines the type of color normalisations, default is 'log' cmap : str, optional A reference to a matplotlib colormap set_yticks : bool, optional Set the names of the series as yticks figsize : tuple, optional The size of the new figure in inches (h,v) progressbar : bool Show progressbar dropna : bool Do not show NaNs as available data kwargs : dict, optional Extra arguments are passed to matplotlib.pyplot.subplots() Returns ------- ax : matplotlib Axes The axes in which the data-availability is plotted """ names = self.pstore.conn.parse_names(names, libname) if libname == "stresses": masknames = self.pstore.stresses.index.isin(names) stresses = self.pstore.stresses.loc[masknames] if kind: mask = stresses["kind"] == kind names = stresses.loc[mask].index.to_list() series = self.pstore.conn._get_series( # noqa: SLF001 libname, names, progressbar=progressbar, squeeze=False ).values() ax = self._data_availability( series, names=names, intervals=intervals, ignore=ignore, ax=ax, cax=cax, normtype=normtype, cmap=cmap, set_yticks=set_yticks, figsize=figsize, dropna=dropna, **kwargs, ) return ax
[docs] @staticmethod def _data_availability( series, names=None, intervals=None, ignore=("second", "minute", "14 days"), ax=None, cax=None, normtype="log", cmap="viridis_r", set_yticks=False, figsize=(10, 8), dropna=True, **kwargs, ): """Plot the data-availability for a list of time series. Parameters ---------- libname : list of pandas.Series list of series to plot data availability for names : list, optional specify names of series, default is None in which case names will be taken from series themselves. kind : str, optional if library is stresses, kind can be specified to obtain only stresses of a specific kind intervals: dict, optional A dict with frequencies as keys and number of seconds as values ignore : list, optional A list with frequencies in intervals to ignore ax: matplotlib Axes, optional pass axes object to plot data availability on existing figure. by default None, in which case a new figure is created cax: matplotlib Axes, optional pass object axes to plot the colorbar on. by default None, which gives default Maptlotlib behavior normtype : str, optional Determines the type of color normalisations, default is 'log' cmap : str, optional A reference to a matplotlib colormap set_yticks : bool, optional Set the names of the series as yticks figsize : tuple, optional The size of the new figure in inches (h,v) progressbar : bool Show progressbar dropna : bool Do not show NaNs as available data kwargs : dict, optional Extra arguments are passed to matplotlib.pyplot.subplots() Returns ------- ax : matplotlib Axes The axes in which the data-availability is plotted """ # a good colormap is cmap='RdYlGn_r' or 'cubehelix' if ax is None: fig, ax = plt.subplots(figsize=figsize, **kwargs) else: fig = ax.get_figure() ax.invert_yaxis() if intervals is None: intervals = { "second": 1, "minute": 60, "hour": 60 * 60, "day": 60 * 60 * 24, "week": 60 * 60 * 24 * 7, "14 days": 60 * 60 * 24 * 14, "month": 60 * 60 * 24 * 31, "quarter": 60 * 60 * 24 * 31 * 4, "year": 60 * 60 * 24 * 366, } for i in ignore: if i in intervals: intervals.pop(i) bounds = np.array([intervals[i] for i in intervals]) bounds = bounds.astype(float) * (10**9) labels = intervals.keys() if normtype == "log": norm = LogNorm(vmin=bounds[0], vmax=bounds[-1]) else: norm = BoundaryNorm(boundaries=bounds, ncolors=256) cmap = plt.get_cmap(cmap, 256) cmap.set_over((1.0, 1.0, 1.0)) pc = None for i, s in enumerate(series): if not s.empty: if dropna: s = s.dropna() pc = ax.pcolormesh( s.index, [i, i + 1], [np.diff(s.index).astype(float)], norm=norm, cmap=cmap, linewidth=0, rasterized=True, ) # make a colorbar in an ax on the # right side, then set the current axes to ax again if pc is not None: cb = fig.colorbar(pc, ax=ax, cax=cax, extend="both") cb.set_ticks(bounds) cb.ax.set_yticklabels(labels) cb.ax.minorticks_off() else: # nothing was plotted; skip colorbar to avoid UnboundLocalError cb = None if set_yticks: ax.set_yticks(np.arange(0.5, len(series) + 0.5), minor=False) ax.set_yticks(np.arange(0, len(series) + 1), minor=True) if names is None: names = [s.name for s in series] ax.set_yticklabels(names) for tick in ax.yaxis.get_major_ticks(): # don't show major ytick marker tick.tick1line.set_visible(False) ax.grid(True, which="minor", axis="y") ax.grid(True, which="major", axis="x") else: ax.set_ylabel("Timeseries (-)") ax.grid(True, which="both") ax.grid(True, which="both") return ax
[docs] def cumulative_hist( self, statistic="rsq", modelnames=None, extend=False, ax=None, figsize=(6, 6), label=None, legend=True, progressbar=True, ): """Plot a cumulative step histogram for a model statistic. Parameters ---------- statistic: str name of the statistic, e.g. "evp" or "rmse", by default "rsq" modelnames: list[str], optional modelnames to plot statistic for, by default None, which uses all models in the store extend: bool, optional force extend the stats Series with a dummy value to move the horizontal line outside figure bounds. If True the results are skewed a bit, especially if number of models is low. ax: matplotlib.Axes, optional axes to plot histogram, by default None which creates an Axes figsize: tuple, optional figure size, by default (6,6) label: str, optional label for the legend, by default None, which shows the number of models legend: bool, optional show legend, by default True progressbar: bool, optional show progressbar, default is True. Returns ------- ax : matplotlib Axes The axes in which the cumulative histogram is plotted """ statsdf = self.pstore.get_statistics( [statistic], modelnames=modelnames, progressbar=progressbar ) if ax is None: _, ax = plt.subplots(1, 1, figsize=figsize) ax.set_xticks(np.linspace(0, 1, 11)) ax.set_xlim(0, 1) ax.set_ylabel(statistic) ax.set_xlabel("Density") ax.set_title("Cumulative Step Histogram") if statistic == "evp": ax.set_yticks(np.linspace(0, 100, 11)) if extend: statsdf = statsdf.append(pd.Series(100, index=["dummy"])) ax.set_ylim(0, 100) else: ax.set_ylim(0, statsdf.max()) elif statistic in ("rsq", "nse", "kge_2012"): ax.set_yticks(np.linspace(0, 1, 11)) if extend: statsdf = statsdf.append(pd.Series(1, index=["dummy"])) statsdf[statsdf < 0] = 0 ax.set_ylim(0, 1) else: ax.set_ylim(0, statsdf.max()) elif statistic in ("aic", "bic"): ax.set_ylim(statsdf.min(), statsdf.max()) else: if extend: statsdf = statsdf.append(pd.Series(0, index=["dummy"])) ax.set_ylim(0, statsdf.max()) if label is None: if extend: label = f"No. Models = {len(statsdf) - 1}" else: label = f"No. Models = {len(statsdf)}" statsdf.hist( ax=ax, bins=len(statsdf), density=True, cumulative=True, histtype="step", orientation="horizontal", label=label, ) if legend: ax.legend(loc=4) return ax
[docs] def compare_models(self, modelnames, ax=None, **kwargs): """Compare multiple models and plot the results. Parameters ---------- modelnames : list A list of model names to compare. ax : matplotlib.axes.Axes, optional The axes on which to plot the comparison. If not provided, a new figure and axes will be created. **kwargs : dict Additional keyword arguments to pass to the plot function. Returns ------- cm : pastastore.CompareModels The CompareModels object containing the comparison results. """ models = self.pstore.get_models(modelnames) names = kwargs.pop("names", []) onames = [iml.oseries.name for iml in models] if len(np.unique(onames)) == 1 and len(names) == 0: for modelname in modelnames: if onames[0] in modelname: names.append(modelname.replace(onames[0], "")) else: names.append(modelname) elif len(np.unique(onames)) > 1: names = modelnames cm = ps.CompareModels(models, names=names) if ax is not None: kwargs.setdefault("ax", ax) cm.plot(**kwargs) return cm