Source code for pyam.netcdf

import datetime as dt

import numpy as np
import pandas as pd

from pyam.index import get_index_levels

try:
    import xarray as xr

    HAS_XARRAY = True
except ModuleNotFoundError:
    xr = None
    HAS_XARRAY = False
from pyam.utils import IAMC_IDX, META_IDX

NETCDF_IDX = ["time", "model", "scenario", "region"]


[docs] def read_netcdf(path): """Read timeseries data and meta indicators from a netCDF file Parameters ---------- path : :class:`pathlib.Path` or file-like object Scenario data file in netCDF format. Returns ------- :class:`IamDataFrame` See Also -------- pyam.IamDataFrame.to_netcdf Notes ----- Read the `pyam-netcdf docs <https://pyam-iamc.readthedocs.io/en/stable/api/io.html>`_ for more information on the expected file format structure. """ from pyam import IamDataFrame if not HAS_XARRAY: raise ModuleNotFoundError("Reading netcdf files requires 'xarray'.") _ds = xr.open_dataset(path) _list_variables = [i for i in _ds.to_dict()["data_vars"].keys()] # Check if the time coordinate is years (integers) or date time-format is_year_based = all( isinstance(x, (int, np.integer)) for x in _ds.coords["time"].values ) is_datetime = all( isinstance(x, (dt.date, dt.time, np.datetime64)) for x in _ds.coords["time"].values ) # Check if the xarray dataset has the correct coordinates, then get column names if is_year_based: _list_cols = IAMC_IDX + ["year", "value"] elif is_datetime: _list_cols = IAMC_IDX + ["time", "value"] else: raise TypeError( "Time coordinates can year (integer) or datetime format, found: " + _ds.coords["time"] ) # read `data` table _data = [] _meta = [] for _var in _list_variables: # Check dimensions, if exactly as in META_IDX is a meta indicator # if exactly as in IAMC_IDX is a variable if set(_ds[_var].dims) == set(META_IDX): _meta.append(_var) elif set(_ds[_var].dims) == set(NETCDF_IDX): # convert the data into the IamDataframe format _tmp = ( _ds[_var] .to_dataframe() .rename(columns={_var: "value"}) .reset_index(drop=False) ) _tmp["variable"] = _ds[_var].long_name _tmp["unit"] = _ds[_var].unit _data.append(_tmp) else: raise TypeError( f"Cannot define {_var}, different indices from META_IDX and IAMC_IDX." ) data = pd.concat(_data).reset_index(drop=True) # if year-based data, get the time coordinate as "year" # if timeseries, keep the time coordinate as "time" if is_year_based: data = data.rename(columns={"time": "year"}) return IamDataFrame( data, meta=_ds[_meta].to_dataframe().replace("nan", np.nan) if _meta else None, )
def to_xarray(data_series: pd.Series, meta: pd.DataFrame): """Convert timeseries data and meta indicators to an xarray Dataset Returns ------- :class:`xarray.Dataset` """ if not HAS_XARRAY: raise ModuleNotFoundError("Converting to xarray requires 'xarray'.") dataset = xr.Dataset() # add timeseries data-variables for variable, _variable_data in data_series.groupby("variable"): unit = get_index_levels(_variable_data, "unit") if len(unit) > 1: raise ValueError( "Cannot write to xarray for non-unique units in '" + variable + "'." ) dataset[variable] = xr.DataArray( _variable_data.droplevel(["variable", "unit"]).to_xarray(), ) dataset[variable].attrs = { "unit": unit[0], "long_name": variable, } # add meta indicators as data-variables for meta_indicator, meta_data in meta.items(): meta_data = meta_data.replace(np.nan, "nan") dataset[meta_indicator] = xr.DataArray( meta_data.to_xarray(), dims=META_IDX, name=meta_indicator, ) return dataset