"""Module containing all the plotting methods for PastaStore.
Pastastore comes with a number helpful plotting methods to quickly
visualize time series contained in the store. Plotting time series or data availability
is available through the `plots` attribute of the PastaStore object. For example, if we
have a :class:`pastastore.PastaStore` called `pstore` linking to an existing database,
the plot methods are available as follows::
pstore.plots.oseries()
"""
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pastas as ps
from matplotlib.colors import BoundaryNorm, LogNorm
logger = logging.getLogger(__name__)
[docs]
class Plots:
"""Plot class for Pastastore.
Allows plotting of time series and data availability.
"""
def __init__(self, pstore):
"""Initialize Plots class for Pastastore.
Parameters
----------
pstore : pastastore.Pastastore
Pastastore object
"""
self.pstore = pstore
def __repr__(self):
"""Return string representation of Plots submodule."""
methods = "".join(
[f"\n - {meth}" for meth in dir(self) if not meth.startswith("_")]
)
return "Plotting submodule, available methods:" + methods
[docs]
def _timeseries(
self,
libname,
names=None,
ax=None,
split=False,
figsize=(10, 5),
progressbar=True,
show_legend=True,
labelfunc=None,
legend_kwargs=None,
**kwargs,
):
"""Plot time series from pastastore (internal method).
Parameters
----------
libname : str
name of the library to obtain time series from (oseries
or stresses)
names : list[str], optional
list of time series names to plot, by default None
ax : matplotlib.Axes, optional
pass axes object to plot on existing axes, by default None,
which creates a new figure
split : bool, optional
create a separate subplot for each time series, by default False.
A maximum of 20 time series is supported when split=True.
figsize : tuple, optional
figure size, by default (10, 5)
progressbar : bool, optional
show progressbar when loading time series from store,
by default True
show_legend : bool, optional
show legend, default is True.
labelfunc : callable, optional
function to create custom labels, function should take name of time series
as input
legend_kwargs : dict, optional
additional arguments to pass to legend
Returns
-------
ax : matplotlib.Axes
axes handle
Raises
------
ValueError
split=True is only supported if there are less than 20 time series
to plot.
"""
names = self.pstore.conn.parse_names(names, libname)
if len(names) > 20 and split:
raise ValueError(
"More than 20 time series leads to too many subplots, set split=False."
)
if ax is None:
if split:
_, axes = plt.subplots(len(names), 1, sharex=True, figsize=figsize)
else:
_, axes = plt.subplots(1, 1, figsize=figsize)
else:
axes = ax
tsdict = self.pstore.conn._get_series( # noqa: SLF001
libname, names, progressbar=progressbar, squeeze=False
)
for i, (n, ts) in enumerate(tsdict.items()):
if split and ax is None:
iax = axes[i]
elif ax is None:
iax = axes
else:
iax = ax
if labelfunc is not None:
n = labelfunc(n)
iax.plot(ts.index, ts.squeeze(), label=n, **kwargs)
if split and show_legend:
iax.legend(loc="best", fontsize="x-small")
if not split and show_legend:
if legend_kwargs is None:
legend_kwargs = {}
ncol = legend_kwargs.pop("ncol", 7)
fontsize = legend_kwargs.pop("fontsize", "x-small")
axes.legend(loc=(0, 1), frameon=False, ncol=ncol, fontsize=fontsize)
return axes
[docs]
def oseries(
self,
names=None,
ax=None,
split=False,
figsize=(10, 5),
show_legend=True,
labelfunc=None,
legend_kwargs=None,
**kwargs,
):
"""Plot oseries.
Parameters
----------
names : list[str], optional
list of oseries names to plot, by default None, which loads
all oseries from store
ax : matplotlib.Axes, optional
pass axes object to plot oseries on existing figure,
by default None, in which case a new figure is created
split : bool, optional
create a separate subplot for each time series, by default False.
A maximum of 20 time series is supported when split=True.
figsize : tuple, optional
figure size, by default (10, 5)
show_legend : bool, optional
show legend, default is True.
labelfunc : callable, optional
function to create custom labels, function should take name of time series
as input
legend_kwargs : dict, optional
additional arguments to pass to legend
Returns
-------
ax : matplotlib.Axes
axes handle
"""
return self._timeseries(
"oseries",
names=names,
ax=ax,
split=split,
figsize=figsize,
show_legend=show_legend,
labelfunc=labelfunc,
legend_kwargs=legend_kwargs,
**kwargs,
)
[docs]
def stresses(
self,
names=None,
kind=None,
ax=None,
split=False,
figsize=(10, 5),
show_legend=True,
labelfunc=None,
legend_kwargs=None,
**kwargs,
):
"""Plot stresses.
Parameters
----------
names : list[str], optional
list of oseries names to plot, by default None, which loads
all oseries from store
kind : str, optional
only plot stresses of a certain kind, by default None, which
includes all stresses
ax : matplotlib.Axes, optional
pass axes object to plot oseries on existing figure,
by default None, in which case a new figure is created
split : bool, optional
create a separate subplot for each time series, by default False.
A maximum of 20 time series is supported when split=True.
figsize : tuple, optional
figure size, by default (10, 5)
show_legend : bool, optional
show legend, default is True.
labelfunc : callable, optional
function to create custom labels, function should take name of time series
as input
legend_kwargs : dict, optional
additional arguments to pass to legend
Returns
-------
ax : matplotlib.Axes
axes handle
"""
names = self.pstore.conn.parse_names(names, "stresses")
masknames = self.pstore.stresses.index.isin(names)
stresses = self.pstore.stresses.loc[masknames]
if kind:
mask = stresses["kind"] == kind
names = stresses.loc[mask].index.to_list()
return self._timeseries(
"stresses",
names=names,
ax=ax,
split=split,
figsize=figsize,
show_legend=show_legend,
labelfunc=labelfunc,
legend_kwargs=legend_kwargs,
**kwargs,
)
[docs]
def data_availability(
self,
libname,
names=None,
kind=None,
intervals=None,
ignore=("second", "minute", "14 days"),
ax=None,
cax=None,
normtype="log",
cmap="viridis_r",
set_yticks=False,
figsize=(10, 8),
progressbar=True,
dropna=True,
**kwargs,
):
"""Plot the data-availability for multiple time series in pastastore.
Parameters
----------
libname : str
name of library to get time series from (oseries or stresses)
names : list, optional
specify names in a list to plot data availability for certain
time series
kind : str, optional
if library is stresses, kind can be specified to obtain only
stresses of a specific kind
intervals: dict, optional
A dict with frequencies as keys and number of seconds as values
ignore : list, optional
A list with frequencies in intervals to ignore
ax: matplotlib Axes, optional
pass axes object to plot data availability on existing figure. by
default None, in which case a new figure is created
cax: matplotlib Axes, optional
pass object axes to plot the colorbar on. by default None, which
gives default Maptlotlib behavior
normtype : str, optional
Determines the type of color normalisations, default is 'log'
cmap : str, optional
A reference to a matplotlib colormap
set_yticks : bool, optional
Set the names of the series as yticks
figsize : tuple, optional
The size of the new figure in inches (h,v)
progressbar : bool
Show progressbar
dropna : bool
Do not show NaNs as available data
kwargs : dict, optional
Extra arguments are passed to matplotlib.pyplot.subplots()
Returns
-------
ax : matplotlib Axes
The axes in which the data-availability is plotted
"""
names = self.pstore.conn.parse_names(names, libname)
if libname == "stresses":
masknames = self.pstore.stresses.index.isin(names)
stresses = self.pstore.stresses.loc[masknames]
if kind:
mask = stresses["kind"] == kind
names = stresses.loc[mask].index.to_list()
series = self.pstore.conn._get_series( # noqa: SLF001
libname, names, progressbar=progressbar, squeeze=False
).values()
ax = self._data_availability(
series,
names=names,
intervals=intervals,
ignore=ignore,
ax=ax,
cax=cax,
normtype=normtype,
cmap=cmap,
set_yticks=set_yticks,
figsize=figsize,
dropna=dropna,
**kwargs,
)
return ax
[docs]
@staticmethod
def _data_availability(
series,
names=None,
intervals=None,
ignore=("second", "minute", "14 days"),
ax=None,
cax=None,
normtype="log",
cmap="viridis_r",
set_yticks=False,
figsize=(10, 8),
dropna=True,
**kwargs,
):
"""Plot the data-availability for a list of time series.
Parameters
----------
libname : list of pandas.Series
list of series to plot data availability for
names : list, optional
specify names of series, default is None in which case names
will be taken from series themselves.
kind : str, optional
if library is stresses, kind can be specified to obtain only
stresses of a specific kind
intervals: dict, optional
A dict with frequencies as keys and number of seconds as values
ignore : list, optional
A list with frequencies in intervals to ignore
ax: matplotlib Axes, optional
pass axes object to plot data availability on existing figure. by
default None, in which case a new figure is created
cax: matplotlib Axes, optional
pass object axes to plot the colorbar on. by default None, which
gives default Maptlotlib behavior
normtype : str, optional
Determines the type of color normalisations, default is 'log'
cmap : str, optional
A reference to a matplotlib colormap
set_yticks : bool, optional
Set the names of the series as yticks
figsize : tuple, optional
The size of the new figure in inches (h,v)
progressbar : bool
Show progressbar
dropna : bool
Do not show NaNs as available data
kwargs : dict, optional
Extra arguments are passed to matplotlib.pyplot.subplots()
Returns
-------
ax : matplotlib Axes
The axes in which the data-availability is plotted
"""
# a good colormap is cmap='RdYlGn_r' or 'cubehelix'
if ax is None:
fig, ax = plt.subplots(figsize=figsize, **kwargs)
else:
fig = ax.get_figure()
ax.invert_yaxis()
if intervals is None:
intervals = {
"second": 1,
"minute": 60,
"hour": 60 * 60,
"day": 60 * 60 * 24,
"week": 60 * 60 * 24 * 7,
"14 days": 60 * 60 * 24 * 14,
"month": 60 * 60 * 24 * 31,
"quarter": 60 * 60 * 24 * 31 * 4,
"year": 60 * 60 * 24 * 366,
}
for i in ignore:
if i in intervals:
intervals.pop(i)
bounds = np.array([intervals[i] for i in intervals])
bounds = bounds.astype(float) * (10**9)
labels = intervals.keys()
if normtype == "log":
norm = LogNorm(vmin=bounds[0], vmax=bounds[-1])
else:
norm = BoundaryNorm(boundaries=bounds, ncolors=256)
cmap = plt.get_cmap(cmap, 256)
cmap.set_over((1.0, 1.0, 1.0))
pc = None
for i, s in enumerate(series):
if not s.empty:
if dropna:
s = s.dropna()
pc = ax.pcolormesh(
s.index,
[i, i + 1],
[np.diff(s.index).astype(float)],
norm=norm,
cmap=cmap,
linewidth=0,
rasterized=True,
)
# make a colorbar in an ax on the
# right side, then set the current axes to ax again
if pc is not None:
cb = fig.colorbar(pc, ax=ax, cax=cax, extend="both")
cb.set_ticks(bounds)
cb.ax.set_yticklabels(labels)
cb.ax.minorticks_off()
else:
# nothing was plotted; skip colorbar to avoid UnboundLocalError
cb = None
if set_yticks:
ax.set_yticks(np.arange(0.5, len(series) + 0.5), minor=False)
ax.set_yticks(np.arange(0, len(series) + 1), minor=True)
if names is None:
names = [s.name for s in series]
ax.set_yticklabels(names)
for tick in ax.yaxis.get_major_ticks(): # don't show major ytick marker
tick.tick1line.set_visible(False)
ax.grid(True, which="minor", axis="y")
ax.grid(True, which="major", axis="x")
else:
ax.set_ylabel("Timeseries (-)")
ax.grid(True, which="both")
ax.grid(True, which="both")
return ax
[docs]
def cumulative_hist(
self,
statistic="rsq",
modelnames=None,
extend=False,
ax=None,
figsize=(6, 6),
label=None,
legend=True,
progressbar=True,
):
"""Plot a cumulative step histogram for a model statistic.
Parameters
----------
statistic: str
name of the statistic, e.g. "evp" or "rmse", by default "rsq"
modelnames: list[str], optional
modelnames to plot statistic for, by default None, which
uses all models in the store
extend: bool, optional
force extend the stats Series with a dummy value to move the
horizontal line outside figure bounds. If True the results
are skewed a bit, especially if number of models is low.
ax: matplotlib.Axes, optional
axes to plot histogram, by default None which creates an Axes
figsize: tuple, optional
figure size, by default (6,6)
label: str, optional
label for the legend, by default None, which shows the number
of models
legend: bool, optional
show legend, by default True
progressbar: bool, optional
show progressbar, default is True.
Returns
-------
ax : matplotlib Axes
The axes in which the cumulative histogram is plotted
"""
statsdf = self.pstore.get_statistics(
[statistic], modelnames=modelnames, progressbar=progressbar
)
if ax is None:
_, ax = plt.subplots(1, 1, figsize=figsize)
ax.set_xticks(np.linspace(0, 1, 11))
ax.set_xlim(0, 1)
ax.set_ylabel(statistic)
ax.set_xlabel("Density")
ax.set_title("Cumulative Step Histogram")
if statistic == "evp":
ax.set_yticks(np.linspace(0, 100, 11))
if extend:
statsdf = statsdf.append(pd.Series(100, index=["dummy"]))
ax.set_ylim(0, 100)
else:
ax.set_ylim(0, statsdf.max())
elif statistic in ("rsq", "nse", "kge_2012"):
ax.set_yticks(np.linspace(0, 1, 11))
if extend:
statsdf = statsdf.append(pd.Series(1, index=["dummy"]))
statsdf[statsdf < 0] = 0
ax.set_ylim(0, 1)
else:
ax.set_ylim(0, statsdf.max())
elif statistic in ("aic", "bic"):
ax.set_ylim(statsdf.min(), statsdf.max())
else:
if extend:
statsdf = statsdf.append(pd.Series(0, index=["dummy"]))
ax.set_ylim(0, statsdf.max())
if label is None:
if extend:
label = f"No. Models = {len(statsdf) - 1}"
else:
label = f"No. Models = {len(statsdf)}"
statsdf.hist(
ax=ax,
bins=len(statsdf),
density=True,
cumulative=True,
histtype="step",
orientation="horizontal",
label=label,
)
if legend:
ax.legend(loc=4)
return ax
[docs]
def compare_models(self, modelnames, ax=None, **kwargs):
"""Compare multiple models and plot the results.
Parameters
----------
modelnames : list
A list of model names to compare.
ax : matplotlib.axes.Axes, optional
The axes on which to plot the comparison. If not provided, a new figure
and axes will be created.
**kwargs : dict
Additional keyword arguments to pass to the plot function.
Returns
-------
cm : pastastore.CompareModels
The CompareModels object containing the comparison results.
"""
models = self.pstore.get_models(modelnames)
names = kwargs.pop("names", [])
onames = [iml.oseries.name for iml in models]
if len(np.unique(onames)) == 1 and len(names) == 0:
for modelname in modelnames:
if onames[0] in modelname:
names.append(modelname.replace(onames[0], ""))
else:
names.append(modelname)
elif len(np.unique(onames)) > 1:
names = modelnames
cm = ps.CompareModels(models, names=names)
if ax is not None:
kwargs.setdefault("ax", ax)
cm.plot(**kwargs)
return cm