Source code for pyam.compute

import itertools
import math

import pandas as pd
import wquantiles

from pyam._debiasing import _compute_bias
from pyam.index import replace_index_values
from pyam.operations import apply_ops
from pyam.timeseries import growth_rate
from pyam.utils import remove_from_list



[docs]
class IamComputeAccessor:
    """Perform computations on the timeseries data of an IamDataFrame

    An :class:`IamDataFrame` has a module for computation of (advanced) indicators
    from the timeseries data.

    The methods in this module can be accessed via

    .. code-block:: python

        IamDataFrame.compute.<method>(*args, **kwargs)
    """

    def __init__(self, df):
        self._df = df


[docs]
    def share(self, a, b, name, axis="variable", append=False):
        """Compute the share of timeseries data `a` relative to `b` along an `axis`

        This function computes `(a / b) * 100` and assigns the unit '%'. If `a` or `b`
        are lists, the method applies
        :meth:`pandas.groupby().sum() <pandas.core.groupby.GroupBy.sum>` on each group.
        If either `a` or `b` are not defined for a row and `fillna` is not specified,
        no value is computed for that row.

        Parameters
        ----------
        a, b : str or list of str
            Numerator and denominator for the computation.
        name : str
            Name of the computed timeseries data on the `axis`.
        axis : str, optional
            Axis along which to compute.
        append : bool, optional
            Whether to append aggregated timeseries data to this instance.

        Returns
        -------
        :class:`IamDataFrame` or **None**
            Computed timeseries data or None if `append=True`.
        """
        # check that units are valid to compute shares
        a_unit = self._df.filter(**{axis: a}).unit
        b_unit = self._df.filter(**{axis: b}).unit
        for arg, _unit in (("a", a_unit), ("b", b_unit)):
            if len(_unit) > 1:
                raise ValueError(f"Units of `{arg}` not unique: {', '.join(_unit)}")
        if a_unit != b_unit:
            raise ValueError(f"Mismatching units: '{a_unit[0]}' != '{b_unit[0]}'")

        # compute the share by dividing "a / b" and multiplying by 100
        _value = apply_ops(
            self._df,
            name,
            "divide",
            a=a,
            b=b,
            axis=axis,
            ignore_units="%",
        )
        _value = _value * 100

        # append to `self` or return as `IamDataFrame`
        return self._df._finalize(_value, append=append)



[docs]
    def quantiles(
        self, quantiles, weights=None, level=["model", "scenario"], append=False
    ):
        """Compute the optionally weighted quantiles of data grouped by `level`.

        For example, the following will provide the interquartile range and median value
        of CO2 emissions across all models and scenarios in a given dataset:

        .. code-block:: python

            df.filter(variable="Emissions|CO2").compute.quantiles([0.25, 0.5, 0.75])

        Parameters
        ----------
        quantiles : collection
            Group of quantile values to compute
        weights : pd.Series, optional
            Series indexed by `level`
        level : collection, optional
            The index columns to compute quantiles over
        append : bool, optional
            Whether to append computed timeseries data to this instance.

        Returns
        -------
        :class:`IamDataFrame` or **None**
            Computed data or None if `append=True`.

        Raises
        ------
        ValueError
            If more than one variable provided or if argument `weights` is malformed.
        """
        from pyam.core import (
            IamDataFrame,
            concat,
        )

        self_df = self._df
        if len(self_df.variable) > 1:
            raise ValueError(
                "quantiles() currently supports only 1 variable, and this"
                f"dataframe has {len(self_df.variable)}"
            )
        if weights is not None and weights.name != "weight":
            raise ValueError("weights pd.Series must have name 'weight'")

        df = self_df.timeseries()
        model = (
            "Quantiles" if weights is None else "Weighted Quantiles"
        )  # can make this a kwarg

        # get weights aligned with model/scenario in data
        if weights is None:
            df["weight"] = 1.0
        else:
            df = df.join(weights, how="inner")
        w = df["weight"]
        df.drop("weight", axis="columns", inplace=True)

        # prep data for processing
        df = df.reset_index(level=level).drop(columns=level)

        dfs = []
        # indexed over region, variable, and unit
        idxs = df.index.drop_duplicates()
        for idx, q in itertools.product(idxs, quantiles):
            data = pd.Series(
                wquantiles.quantile(df.loc[idx].values.T, w.values, q),
                index=pd.Series(df.columns, name="year"),
                name="value",
            )
            kwargs = {idxs.names[i]: idx[i] for i in range(len(idx))}
            dfs.append(
                IamDataFrame(
                    data,
                    model=model,
                    scenario=str(q),  # can make this a kwarg
                    **kwargs,
                )
            )

        # append to `self` or return as `IamDataFrame`
        return self_df._finalize(concat(dfs), append=append)



[docs]
    def growth_rate(self, mapping, append=False):
        """Compute the annualized growth rate of a timeseries along the time dimension

        The growth rate parameter in period *t* is computed based on the changes
        to the subsequent period, i.e., from period *t* to period *t+1*.

        Parameters
        ----------
        mapping : dict
            Mapping of *variable* item(s) to the name(s) of the computed data,
            e.g.,

            .. code-block:: python

               {"variable": "name of growth-rate variable", ...}

        append : bool, optional
            Whether to append computed timeseries data to this instance.

        Returns
        -------
        :class:`IamDataFrame` or **None**
            Computed timeseries data or None if `append=True`.

        Raises
        ------
        ValueError
            Math domain error when timeseries crosses 0.

        See Also
        --------
        pyam.timeseries.growth_rate

        """
        value = (
            self._df._data[self._df._apply_filters(variable=mapping)]
            .groupby(remove_from_list(self._df.dimensions, ["year"]), group_keys=False)
            .apply(growth_rate)
        )
        if value.empty:
            value = empty_series(remove_from_list(self._df.dimensions, "unit"))
        else:
            # drop level "unit" and reinsert below, replace "variable"
            value.index = replace_index_values(
                value.index.droplevel("unit"), "variable", mapping
            )

        return self._df._finalize(value, append=append, unit="")



[docs]
    def learning_rate(self, name, performance, experience, append=False):
        """Compute the implicit learning rate from timeseries data

        Experience curves are based on the concept that a technology's performance
        improves as experience with this technology grows.

        The "learning rate" indicates the performance improvement (e.g., cost reduction)
        for each doubling of the accumulated experience (e.g., cumulative capacity).

        The experience curve parameter *b* is equivalent to the (linear) slope when
        plotting performance and experience timeseries on double-logarithmic scales.
        The learning rate can be computed from the experience curve parameter as
        :math:`1 - 2^{b}`.

        The learning rate parameter in period *t* is computed based on the changes
        to the subsequent period, i.e., from period *t* to period *t+1*.

        Parameters
        ----------
        name : str
            Variable name of the computed timeseries data.
        performance : str
            Variable of the "performance" timeseries (e.g., specific investment costs).
        experience : str
            Variable of the "experience" timeseries (e.g., installed capacity).
        append : bool, optional
            Whether to append computed timeseries data to this instance.

        Returns
        -------
        :class:`IamDataFrame` or **None**
            Computed timeseries data or None if `append=True`.
        """
        value = (
            self._df._data[self._df._apply_filters(variable=[performance, experience])]
            .groupby(
                remove_from_list(self._df.dimensions, ["variable", "year", "unit"])
            )
            .apply(_compute_learning_rate, performance, experience)
        )

        return self._df._finalize(value, append=append, variable=name, unit="")



[docs]
    def bias(self, name, method, axis):
        """Compute the bias weights and add to 'meta'

        Parameters
        ----------
        name : str
           Column name in the 'meta' dataframe
        method : str
            Method to compute the bias weights, see the notes
        axis : str
            Index dimensions on which to apply the `method`

        Notes
        -----

        The following methods are implemented:

        - "count": use the inverse of the number of scenarios grouped by `axis` names.

          Using the following method on an IamDataFrame with three scenarios

          .. code-block:: python

              df.compute.bias(name="bias-weight", method="count", axis="scenario")

          results in the following column to be added to *df.meta*:

          .. list-table::
             :header-rows: 1

             * - model
               - scenario
               - bias-weight
             * - model_a
               - scen_a
               - 0.5
             * - model_a
               - scen_b
               - 1
             * - model_b
               - scen_a
               - 0.5

        """
        _compute_bias(self._df, name, method, axis)




def _compute_learning_rate(x, performance, experience):
    """Internal implementation for computing implicit learning rate from timeseries data

    Parameters
    ----------
    x : :class:`pandas.Series`
        Timeseries data of the *performance* and *experience* variables
        indexed over the time domain.
    performance : str
        Variable of the "performance" timeseries (e.g., specific investment costs).
    experience : str
        Variable of the "experience" timeseries (e.g., cumulative installed capacity).

    Returns
    -------
    Indexed :class:`pandas.Series` of implicit learning rates
    """
    # drop all index dimensions other than "variable" and "year"
    x.index = x.index.droplevel(
        [i for i in x.index.names if i not in ["variable", "year"]]
    )

    # apply log, dropping all values that are zero or negative
    x = x[x > 0].apply(math.log10)

    # return empty pd.Series if not all relevant variables exist
    if not all([v in x.index for v in [performance, experience]]):
        return empty_series(remove_from_list(x.index.names, "variable"))

    # compute the "experience parameter" (slope of experience curve on double-log scale)
    b = (x[performance] - x[performance].shift(periods=-1)) / (
        x[experience] - x[experience].shift(periods=-1)
    )

    # translate to "learning rate" (e.g., cost reduction per doubling of capacity)
    return b.apply(lambda y: 1 - math.pow(2, y))


def empty_series(names):
    """Return an empty pd.Series with correct index names"""
    empty_list = [[]] * len(names)
    return pd.Series(
        index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names),
        dtype="float64",
    )