Source code for pyam.compute

import itertools
import math

import pandas as pd
import wquantiles

from pyam._debiasing import _compute_bias
from pyam.index import replace_index_values
from pyam.timeseries import growth_rate
from pyam.utils import remove_from_list

[docs] class IamComputeAccessor: """Perform computations on the timeseries data of an IamDataFrame An :class:`IamDataFrame` has a module for computation of (advanced) indicators from the timeseries data. The methods in this module can be accessed via .. code-block:: python IamDataFrame.compute.<method>(*args, **kwargs) """ def __init__(self, df): self._df = df
[docs] def quantiles( self, quantiles, weights=None, level=["model", "scenario"], append=False ): """Compute the optionally weighted quantiles of data grouped by `level`. For example, the following will provide the interquartile range and median value of CO2 emissions across all models and scenarios in a given dataset: .. code-block:: python df.filter(variable="Emissions|CO2").compute.quantiles([0.25, 0.5, 0.75]) Parameters ---------- quantiles : collection Group of quantile values to compute weights : pd.Series, optional Series indexed by `level` level : collection, optional The index columns to compute quantiles over append : bool, optional Whether to append computed timeseries data to this instance. Returns ------- :class:`IamDataFrame` or **None** Computed data or None if `append=True`. Raises ------ ValueError If more than one variable provided or if argument `weights` is malformed. """ from pyam.core import ( IamDataFrame, concat, ) self_df = self._df if len(self_df.variable) > 1: raise ValueError( "quantiles() currently supports only 1 variable, and this" f"dataframe has {len(self_df.variable)}" ) if weights is not None and != "weight": raise ValueError("weights pd.Series must have name 'weight'") df = self_df.timeseries() model = ( "Quantiles" if weights is None else "Weighted Quantiles" ) # can make this a kwarg # get weights aligned with model/scenario in data if weights is None: df["weight"] = 1.0 else: df = df.join(weights, how="inner") w = df["weight"] df.drop("weight", axis="columns", inplace=True) # prep data for processing df = df.reset_index(level=level).drop(columns=level) dfs = [] # indexed over region, variable, and unit idxs = df.index.drop_duplicates() for idx, q in itertools.product(idxs, quantiles): data = pd.Series( wquantiles.quantile(df.loc[idx].values.T, w.values, q), index=pd.Series(df.columns, name="year"), name="value", ) kwargs = {idxs.names[i]: idx[i] for i in range(len(idx))} dfs.append( IamDataFrame( data, model=model, scenario=str(q), # can make this a kwarg **kwargs, ) ) # append to `self` or return as `IamDataFrame` return self_df._finalize(concat(dfs), append=append)
[docs] def growth_rate(self, mapping, append=False): """Compute the annualized growth rate of a timeseries along the time dimension The growth rate parameter in period *t* is computed based on the changes to the subsequent period, i.e., from period *t* to period *t+1*. Parameters ---------- mapping : dict Mapping of *variable* item(s) to the name(s) of the computed data, e.g., .. code-block:: python {"variable": "name of growth-rate variable", ...} append : bool, optional Whether to append computed timeseries data to this instance. Returns ------- :class:`IamDataFrame` or **None** Computed timeseries data or None if `append=True`. Raises ------ ValueError Math domain error when timeseries crosses 0. See Also -------- pyam.timeseries.growth_rate """ value = ( self._df._data[self._df._apply_filters(variable=mapping)] .groupby(remove_from_list(self._df.dimensions, ["year"]), group_keys=False) .apply(growth_rate) ) if value.empty: value = empty_series(remove_from_list(self._df.dimensions, "unit")) else: # drop level "unit" and reinsert below, replace "variable" value.index = replace_index_values( value.index.droplevel("unit"), "variable", mapping ) return self._df._finalize(value, append=append, unit="")
[docs] def learning_rate(self, name, performance, experience, append=False): """Compute the implicit learning rate from timeseries data Experience curves are based on the concept that a technology's performance improves as experience with this technology grows. The "learning rate" indicates the performance improvement (e.g., cost reduction) for each doubling of the accumulated experience (e.g., cumulative capacity). The experience curve parameter *b* is equivalent to the (linear) slope when plotting performance and experience timeseries on double-logarithmic scales. The learning rate can be computed from the experience curve parameter as :math:`1 - 2^{b}`. The learning rate parameter in period *t* is computed based on the changes to the subsequent period, i.e., from period *t* to period *t+1*. Parameters ---------- name : str Variable name of the computed timeseries data. performance : str Variable of the "performance" timeseries (e.g., specific investment costs). experience : str Variable of the "experience" timeseries (e.g., installed capacity). append : bool, optional Whether to append computed timeseries data to this instance. Returns ------- :class:`IamDataFrame` or **None** Computed timeseries data or None if `append=True`. """ value = ( self._df._data[self._df._apply_filters(variable=[performance, experience])] .groupby( remove_from_list(self._df.dimensions, ["variable", "year", "unit"]) ) .apply(_compute_learning_rate, performance, experience) ) return self._df._finalize(value, append=append, variable=name, unit="")
[docs] def bias(self, name, method, axis): """Compute the bias weights and add to 'meta' Parameters ---------- name : str Column name in the 'meta' dataframe method : str Method to compute the bias weights, see the notes axis : str Index dimensions on which to apply the `method` Notes ----- The following methods are implemented: - "count": use the inverse of the number of scenarios grouped by `axis` names. Using the following method on an IamDataFrame with three scenarios .. code-block:: python df.compute.bias(name="bias-weight", method="count", axis="scenario") results in the following column to be added to *df.meta*: .. list-table:: :header-rows: 1 * - model - scenario - bias-weight * - model_a - scen_a - 0.5 * - model_a - scen_b - 1 * - model_b - scen_a - 0.5 """ _compute_bias(self._df, name, method, axis)
def _compute_learning_rate(x, performance, experience): """Internal implementation for computing implicit learning rate from timeseries data Parameters ---------- x : :class:`pandas.Series` Timeseries data of the *performance* and *experience* variables indexed over the time domain. performance : str Variable of the "performance" timeseries (e.g., specific investment costs). experience : str Variable of the "experience" timeseries (e.g., cumulative installed capacity). Returns ------- Indexed :class:`pandas.Series` of implicit learning rates """ # drop all index dimensions other than "variable" and "year" x.index = x.index.droplevel( [i for i in x.index.names if i not in ["variable", "year"]] ) # apply log, dropping all values that are zero or negative x = x[x > 0].apply(math.log10) # return empty pd.Series if not all relevant variables exist if not all([v in x.index for v in [performance, experience]]): return empty_series(remove_from_list(x.index.names, "variable")) # compute the "experience parameter" (slope of experience curve on double-log scale) b = (x[performance] - x[performance].shift(periods=-1)) / ( x[experience] - x[experience].shift(periods=-1) ) # translate to "learning rate" (e.g., cost reduction per doubling of capacity) return b.apply(lambda y: 1 - math.pow(2, y)) def empty_series(names): """Return an empty pd.Series with correct index names""" empty_list = [[]] * len(names) return pd.Series( index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names), dtype="float64", )