Source code for pyam.timeseries

import logging
import math

import numpy as np
import pandas as pd

from pyam.str import is_str
from pyam.utils import raise_data_error, to_int

logger = logging.getLogger(__name__)



[docs]
def fill_series(x, time):
    """Returns the timeseries value at a point in time by linear interpolation

    Parameters
    ----------
    x : pandas.Series
        a timeseries to be interpolated
    time : int or pandas.datetime
        year or datetime to interpolate
    """
    x = x.dropna()
    if time in x.index and not np.isnan(x[time]):
        return x[time]
    else:
        prev = [i for i in x.index if i < time]
        nxt = [i for i in x.index if i > time]
        if prev and nxt:
            p = max(prev)
            n = min(nxt)
            return ((n - time) * x[p] + (time - p) * x[n]) / (n - p)
        else:
            return np.nan




[docs]
def cumulative(x, first_year, last_year):
    """Returns the cumulative sum of a timeseries

    This function implements linear interpolation between years
    and ignores nan's in the range.
    The function includes the last-year value of the series, and
    raises a warning if start_year or last_year is outside of
    the timeseries range and returns nan

    Parameters
    ----------
    x : pandas.Series
        a timeseries to be summed over time
    first_year : int
        first year of the sum
    last_year : int
        last year of the sum (inclusive)
    """
    # if the timeseries does not cover the range `[first_year, last_year]`,
    # return nan to avoid erroneous aggregation
    if min(x.index) > first_year:
        logger.warning(f"Start of period {first_year} outside of range.")
        return np.nan
    if max(x.index) < last_year:
        logger.warning(f"End of period {last_year} outside of range.")
        return np.nan

    # make sure we're using integers
    to_int(x, index=True)

    x[first_year] = fill_series(x, first_year)
    x[last_year] = fill_series(x, last_year)

    years = [
        i for i in x.index if i >= first_year and i <= last_year and ~np.isnan(x[i])
    ]
    years.sort()

    # loop over years
    if not np.isnan(x[first_year]) and not np.isnan(x[last_year]):
        value = 0
        for i, yr in enumerate(years[:-1]):
            next_yr = years[i + 1]
            # the summation is shifted to include the first year fully in sum,
            # otherwise, would return a weighted average of `yr` and `next_yr`
            value += ((next_yr - yr - 1) * x[next_yr] + (next_yr - yr + 1) * x[yr]) / 2

        # the loop above does not include the last element in range
        # (`last_year`), therefore added explicitly
        value += x[last_year]

        return value




[docs]
def cross_threshold(
    x, threshold=0, direction=["from above", "from below"], return_type=int
):
    """Returns a list of the years in which a timeseries crosses a threshold

    Parameters
    ----------
    x : :class:`pandas.Series`
        A timeseries indexed over years (as integers)
    threshold : float, optional
        The threshold that the timeseries is checked against
    direction : str, optional
        Whether to return all years where the threshold is crossed
        or only where threshold is crossed in a specific direction
    return_type : type, optional
        Whether to cast the returned values to integer (years)
    """
    direction = [direction] if is_str(direction) else list(direction)
    if not set(direction).issubset({"from above", "from below"}):
        raise ValueError(f"Invalid direction: {direction}")

    # get the values and time-domain index
    x = x.dropna()
    values, index = x.values - threshold, x.index.to_numpy()
    positive, negative = (values >= 0), (values < 0)

    # determine all indices before crossing the threshold
    pre = [False] * (len(x) - 1)
    if "from above" in direction:
        pre |= positive[:-1] & negative[1:]
    if "from below" in direction:
        pre |= positive[1:] & negative[:-1]
    pre = np.argwhere(pre)
    # determine all indices after crossing the threshold
    post = pre + 1

    # compute the index value where the threshold is crossed
    change = (values[post] - values[pre]) / (index[post] - index[pre])
    years = index[pre] - values[pre] / change

    # it year (as int) is returned, add one because int() rounds down
    if return_type == int:
        return [y + 1 for y in map(int, years)]
    return years




[docs]
def growth_rate(x):
    """Compute the annualized growth rate from timeseries data

    The annualized growth rate parameter in period *t* is computed assuming exponential
    growth based on the changes from period *t* to period *t+1*.

    Parameters
    ----------
    x : :class:`pandas.Series`
        Timeseries data indexed over the time domain.

    Returns
    -------
    Indexed :class:`pandas.Series` of annualized growth rates

    Raises
    ------
    ValueError
        Math domain error when timeseries crosses 0.

    See Also
    --------
    pyam.IamComputeAccessor.growth_rate

    """

    if not (all([v > 0 for v in x.values]) or all([v < 0 for v in x.values])):
        raise_data_error("Cannot compute growth rate when timeseries crosses 0", x)

    x = x.sort_index()
    growth_rate = (-x.diff(periods=-1) / x).values[:-1]  # diff on latest period is nan

    if isinstance(x.index, pd.MultiIndex):
        periods = x.index.get_level_values("year")
    else:
        periods = x.index
    period_length = -pd.Series(periods).diff(periods=-1).values[:-1]

    return pd.Series(
        [math.pow(1 + v, 1 / d) - 1 for v, d in zip(growth_rate, period_length)],
        index=x.index[:-1],
    )