Source code for message_ix_models.tools.add_dac

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 15:41:32 2023

@author: pratama
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from message_ix import make_df
from message_ix.models import MESSAGE



[docs]
def generate_df(scenario, tech_data):  # noqa: C901
    """
    This function generate parameter dataframe, matching the data input
    in yaml file and parameter's dimension

    Parameters
    ----------
    scenario    : message_ix.Scenario()
        MESSAGEix Scenario where the data will be included
    tech_data   : dictionary, technology data read using load_package_data
    """

    # Set up dictionary of parameter indices list
    par_idx = {}
    data = {}

    message_items = {k: v.to_dict() for k, v in MESSAGE.items.items()}

    # Create dicitonary of parameter indices and data
    for tech in set(tech_data):
        # add vintage and active years and update tech_data for each tech
        years_vtg_act = scenario.vintage_and_active_years()
        years_vtg_act = years_vtg_act[
            years_vtg_act["year_vtg"] >= tech_data[tech]["year_init"]
        ]
        tech_data[tech]["year_vtg"] = years_vtg_act["year_vtg"].to_list()
        tech_data[tech]["year_act"] = years_vtg_act["year_act"].to_list()

        # collect parameter indices and update data
        par_idx.update(
            {
                tech: {
                    name: {
                        idx: []
                        for idx in list(
                            message_items[tech_data[tech][name]["par_name"]].get(
                                "idx_names"
                            )
                        )
                    }
                    for name in set(tech_data[tech])
                    - set(["year_init", "year_vtg", "year_act"])
                }
            }
        )

        data.update({tech: {name: [] for name in list(par_idx[tech].keys())}})

    # If those are not provided, then this block of code
    # is needed to retrieve them from the data input
    regions = []
    emissions = []
    times = []
    modes = []
    commodities = []
    levels = []
    relations = []

    set_elements_dict = {
        "node_loc": {"data": regions, "name": "node"},
        "emission": {"data": emissions, "name": "emission"},
        "mode": {"data": modes, "name": "mode"},
        "time": {"data": times, "name": "time"},
        "commodity": {"data": commodities, "name": "commodity"},
        "level": {"data": levels, "name": "level"},
        "time_origin": {"data": times, "name": "time"},
        "time_dest": {"data": times, "name": "time"},
        "relation": {"data": relations, "name": "relation"},
        "node_rel": {"data": regions, "name": "node"},
    }

    # Create DataFrame for all parameters
    for tec, val in data.items():
        for name in val.keys():
            if tec not in scenario.set("technology"):
                scenario.add_set("technology", tec)

            if "commodity" in par_idx[tec][name]:
                commodity = list(tech_data[tec][name]["commodity"].keys())[0]
                if commodity not in scenario.set("commodity"):
                    scenario.add_set("commodity", commodity)

            kwargs = {}
            if all(idx in par_idx[tec][name] for idx in ["year_vtg", "year_act"]):
                kwargs = {
                    "year_vtg": tech_data[tec]["year_vtg"],
                    "year_act": tech_data[tec]["year_act"],
                }
            elif "year_vtg" in par_idx[tec][name]:
                kwargs = {"year_vtg": sorted(set(tech_data[tec]["year_vtg"]))}
            else:
                kwargs = {"year_act": sorted(set(tech_data[tec]["year_act"]))}
                # if 'year_rel' is present, the values are assumed
                # from 'year_act' values
                if "year_rel" in par_idx[tec][name]:
                    kwargs.update({"year_rel": sorted(set(tech_data[tec]["year_act"]))})

            df = make_df(
                tech_data[tec][name]["par_name"],
                technology=tec,
                value=tech_data[tec][name]["value"],
                unit=tech_data[tec][name]["unit"],
                **kwargs,
            )

            # create empty dataframe
            idx_exp = [
                e
                for e in par_idx[tec][name]
                if e
                not in [
                    "technology",
                    "year_vtg",
                    "year_act",
                    "year_rel",
                    "node_origin",
                    "node_dest",
                    "node_rel",
                    "time_origin",
                    "time_dest",
                ]
            ]

            for idx in idx_exp:
                default = (
                    {
                        e: 1
                        for e in list(scenario.set(set_elements_dict[idx]["name"]))[1:]
                    }
                    if idx in ["node_loc", "mode"]
                    else {
                        e: 1 for e in list(scenario.set(set_elements_dict[idx]["name"]))
                    }
                )

                listidx = list(tech_data[tec][name].get(idx, default))
                listdfidx = []
                for e in listidx:
                    df1 = df.copy()
                    df1[idx] = [e] * len(df)
                    listdfidx.append(df1)
                df = pd.concat(listdfidx, ignore_index=True)

            # assigning values for node and time related indices
            for idx in df.columns:
                if idx in ["node_origin", "node_dest", "node_rel"]:
                    df[idx] = df["node_loc"]
                if idx in ["time_origin", "time_dest"]:
                    df[idx] = df["time"]

            # Calculate values of row-by-row multipliers
            mult = []
            for i in range(len(df)):
                # node_loc factor
                _node_loc = (
                    tech_data[tec]
                    .get(name, {})
                    .get("node_loc", {})
                    .get(df.get("node_loc", {}).get(i), 1)
                )

                # year_vtg factor
                # _year_vtg = (1+rate)**delta_years

                if "year_vtg" in df.columns:
                    usf_year_vtg = (
                        tech_data[tec]
                        .get(name, {})
                        .get("year_vtg", {})
                        .get(df["year_vtg"][i], 1)
                    )

                    exp_year_vtg = df["year_vtg"][i] - tech_data[tech]["year_init"]

                    _year_vtg = (
                        np.power(
                            (
                                1
                                + tech_data[tec]
                                .get(name, {})
                                .get("year_vtg", {})
                                .get("rate", 0.0)
                            ),
                            exp_year_vtg,
                        )
                        * usf_year_vtg
                    )
                else:
                    _year_vtg = 1

                # year_act factor
                # _year_act = ((1+rate)**(year_act-year_vtg))*usf_year_act
                # if both years present
                # _year_act = ((1+rate)**(year_act-first_active_year))*usf_year_act
                # if no year_vtg

                if "year_act" in df.columns:
                    usf_year_act = (
                        tech_data[tec]
                        .get(name, {})
                        .get("year_act", {})
                        .get(df["year_act"][i], 1)
                    )

                    exp_year_act = df["year_act"][i] - (
                        df["year_vtg"][i]
                        if "year_vtg" in df.columns
                        else tech_data[tech]["year_init"]
                    )

                    _year_act = (
                        np.power(
                            (
                                1
                                + tech_data[tec]
                                .get(name, {})
                                .get("year_act", {})
                                .get("rate", 0)
                            ),
                            exp_year_act,
                        )
                        * usf_year_act
                    )
                else:
                    _year_act = 1

                # get mode multiplier from model_data
                _mode = (
                    tech_data[tec]
                    .get(name, {})
                    .get("mode", {})
                    .get(df.get("mode", {}).get(i), 1)
                )

                mult.append(
                    np.prod(
                        [
                            _node_loc,
                            _year_vtg,
                            _year_act,
                            _mode,
                        ]
                    )
                )

            # index adjusted df
            value = df["value"] * mult
            value = [e for e in value]
            df["value"] = value

            data[tec][name] = df

    return data




[docs]
def print_df(scenario, tech_data):
    data = generate_df(scenario, tech_data)
    for tec, val in data.items():
        with pd.ExcelWriter(f"{tec}.xlsx", engine="xlsxwriter", mode="w") as writer:
            for sheet_name, sheet_data in val.items():
                sheet_data.to_excel(writer, sheet_name=sheet_name, index=False)




[docs]
def add_tech(scenario, tech_data):
    """
    Parameters
    ----------
    scenario    : message_ix.Scenario()
        MESSAGEix Scenario where the data will be included
    filepath    : string, path of the input file
        the default is in the module's folder
    """

    # check if all required sets already in scenario
    # TODO: this must not be hardcoded here
    # if "CO2_storage" not in scenario.set("emission"):
    #    scenario.add_set("emission", "CO2_storage")
    # if "co2_storage_pot" not in scenario.set("type_emission"):
    #    scenario.add_set("type_emission", "co2_storage_pot")
    # if "co2_potential" not in scenario.set("type_tec"):
    #    scenario.add_set("type_tec", "co2_potential")
    # if "co2_stor" not in scenario.set("technology"):
    #    scenario.add_set("technology", "co2_stor")

    # scenario.add_set("cat_emission", ["co2_storage_pot", "CO2_storage"])
    # scenario.add_set("cat_tec", ["co2_potential", "co2_stor"])

    # Reading new technology database
    data = generate_df(scenario, tech_data)

    # TODO: @ywpratama, bring in the set information here from the YAML file
    # Adding parameters by technology and name
    for tec, val in data.items():
        if tec not in set(scenario.set("technology")):
            scenario.add_set("technology", tec)

        for name in val.keys():
            if tech_data[tec][name]["par_name"] == "relation_activity":
                for rel in tech_data[tec][name]["relation"]:
                    if rel not in set(scenario.set("relation")):
                        scenario.add_set("relation", rel)
                # if tech_data[tec][name]["relation"][0] not in set(
                #    scenario.set("relation")
                # ):
                #    scenario.add_set("relation", tech_data[tec][name]["relation"][0])
            scenario.add_par(tech_data[tec][name]["par_name"], data[tec][name])


    # TODO: @ywpratama, add in the relation_actiavity for emissions in the yaml file
    # Specific for daccs setup in the global model,
    # "CO2_Emission_Global_Total" relation should be added via
    # yaml file referencing this method below

    # n_nodes = np.int32(len(scenario.set("node")) - 2)

    # excluding 'World' and 'RXX_GLB'
    # reg_exception = ["World", f"R{n_nodes}_GLB"]
    # node_loc = [e for e in scenario.set("node") if e not in reg_exception]
    # year_act = [e for e in scenario.set("year") if e >= 2025]

    # Creating dataframe for CO2_Emission_Global_Total relation
    # CO2_global_par = []
    # for reg in node_loc:
    #    CO2_global_par.append(
    #        make_df(
    #            "relation_activity",
    #            relation="CO2_Emission_Global_Total",
    #            node_rel=f"R{n_nodes}_GLB",
    #            year_rel=year_act,
    #            node_loc=reg,
    #            technology="co2_stor",
    #            year_act=year_act,
    #            mode="M1",
    #            value=-1,
    #            unit="-",
    #        )
    #    )
    # CO2_global_par = pd.concat(CO2_global_par)
    # relation lower and upper bounds
    # rel_lower_upper = []
    # for rel in ["co2_trans", "bco2_trans"]:
    #    for reg in node_loc:
    #        rel_lower_upper.append(
    #            make_df(
    #                "relation_lower",
    #                relation=rel,
    #                node_rel=reg,
    #                year_rel=year_act,
    #                value=0,
    #                unit="-",
    #            )
    #        )
    # rel_lower_upper = pd.concat(rel_lower_upper)
    # Adding the dataframe to the scenario
    # scenario.add_par("relation_activity", CO2_global_par)
    # scenario.add_par("relation_lower", rel_lower_upper)
    # scenario.add_par("relation_upper", rel_lower_upper)



[docs]
def get_values(
    scenario,
    variable="",
    valuetype="lvl",
    # filters = {}
):
    # filters must use 'cat_tec' to aggregate technology
    # don't forget to include check unit
    """
    Parameters
    ----------
    scenario    : message_ix.Scenario()
        MESSAGEix Scenario where the data will be included
    variable    : string
        name of variable to report
    valuetype   : string, 'lvl' or 'mrg'
        type of values reported to report,
        either level or marginal.
        default is 'lvl'
    """

    if isinstance(scenario.var(variable), pd.DataFrame):
        df = scenario.var(variable)
        dimensions = [col for col in df.columns if col not in ["lvl", "mrg"]]
        return df.set_index(dimensions)[[valuetype]]
    else:
        return scenario.var(variable)[valuetype]




[docs]
def get_report(
    scenario,
    technologies=[],
):
    """
    Parameters
    ----------
    scenario    : message_ix.Scenario()
        MESSAGEix Scenario where the data will be included
    technologies    : string or list
        name of technology to be reported
    variable    : string or list
        name of variable to report
    """
    var_dict = {var: [] for var in ["CAP", "CAP_NEW", "INVESTMENT", "REMOVAL"]}

    # listing model years to be reported
    years_rep = sorted(
        scenario.set("cat_year")
        .set_index("type_year")
        .loc["cumulative", "year"]
        .to_list()
    )

    # Create dataframe
    for var in var_dict.keys():
        # primary variables
        if var in ["CAP", "CAP_NEW"]:
            df = (
                get_values(scenario, var)["lvl"]
                .unstack()
                .loc[:, technologies, :]
                .groupby(["node_loc"])
                .sum()
            )[years_rep]

        # investment
        elif var == "INVESTMENT":
            depl = (
                get_values(scenario, "CAP_NEW")["lvl"].unstack().loc[:, technologies, :]
            )[years_rep]

            dfic = scenario.par("inv_cost")

            inv = (
                dfic.loc[dfic["technology"].isin(technologies)]
                .set_index(["node_loc", "technology", "year_vtg"])["value"]
                .unstack()
            )

            df = depl.mul(inv).groupby(["node_loc"]).sum()

        # removal
        elif var == "REMOVAL":
            acts = get_values(scenario, "ACT").droplevel(["mode", "time"])
            df = (
                acts.loc[:, technologies, :, :]["lvl"]
                .unstack()
                .groupby(["node_loc"])
                .sum()
            )

        df.loc["World"] = df.sum(axis=0)

        var_dict[var] = df

    # Create dictionary for variable dataframes and write variables to excel
    with pd.ExcelWriter("get_report_output.xlsx", engine="openpyxl") as writer:
        for var in var_dict.keys():
            var_dict[var].to_excel(writer, sheet_name=var)

    frame_count = 0
    fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 6))
    for k, v in var_dict.items():
        r = np.int32(np.floor(frame_count / 2))
        c = frame_count - r * 2
        frame_count += 1
        for reg in range(len(v)):
            kwargs = {"marker": "o"} if reg == 11 else {}
            axs[r, c].plot(v.columns, v.iloc[reg], label=v.index[reg], **kwargs)
        axs[r, c].set_title(k)
    axs[0, 0].legend(ncols=2)
    plt.tight_layout()
    plt.show()

    return var_dict