Source code for amici.pandas

"""
Pandas Wrappers
---------------
This module contains convenience wrappers that allow for easy interconversion
between C++ objects from :mod:`amici.amici` and pandas DataFrames
"""

import copy
import math
from typing import Optional, SupportsFloat, Union

import amici
import numpy as np
import pandas as pd

from .numpy import ExpDataView

__all__ = [
    "get_expressions_as_dataframe",
    "getEdataFromDataFrame",
    "getDataObservablesAsDataFrame",
    "getSimulationObservablesAsDataFrame",
    "getSimulationStatesAsDataFrame",
    "getResidualsAsDataFrame",
]

ExpDatas = Union[
    list[amici.amici.ExpData],
    list[amici.ExpDataPtr],
    amici.amici.ExpData,
    amici.ExpDataPtr,
]
ReturnDatas = Union[list[amici.ReturnDataView], amici.ReturnDataView]

AmiciModel = Union[amici.ModelPtr, amici.Model]


def _process_edata_list(edata_list: ExpDatas) -> list[amici.amici.ExpData]:
    """
    Maps single instances of :class:`amici.amici.ExpData` to lists of
    :class:`amici.amici.ExpData`

    :param edata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(edata_list, (amici.amici.ExpData, amici.ExpDataPtr)):
        return [edata_list]
    else:
        return edata_list


def _process_rdata_list(rdata_list: ReturnDatas) -> list[amici.ReturnDataView]:
    """
    Maps single instances of :class:`amici.ReturnData` to lists of
    :class:`amici.ReturnData`

    :param rdata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(rdata_list, amici.ReturnDataView):
        return [rdata_list]
    else:
        return rdata_list



[docs]
def getDataObservablesAsDataFrame(
    model: AmiciModel, edata_list: ExpDatas, by_id: Optional[bool] = False
) -> pd.DataFrame:
    """
    Write Observables from experimental data as DataFrame.

    :param model:
        Model instance.

    :param edata_list:
        list of ExpData instances with experimental data.
        May also be a single ExpData instance.

    :param by_id:
        If True, uses observable ids as column names in the generated
        DataFrame, otherwise the possibly more descriptive observable names
        are used.

    :return:
        pandas DataFrame with conditions/timepoints as rows and observables as
        columns.
    """
    edata_list = _process_edata_list(edata_list)

    # list of all column names using either ids or names
    cols = _get_extended_observable_cols(model, by_id=by_id)

    # aggregate records
    dicts = []
    for edata in edata_list:
        npdata = ExpDataView(edata)
        for i_time, timepoint in enumerate(edata.getTimepoints()):
            datadict = {"time": timepoint, "datatype": "data"}
            # add observables and noises
            for i_obs, obs in enumerate(
                _get_names_or_ids(model, "Observable", by_id=by_id)
            ):
                datadict[obs] = npdata["observedData"][i_time, i_obs]
                datadict[obs + "_std"] = npdata["observedDataStdDev"][
                    i_time, i_obs
                ]

            # add conditions
            _fill_conditions_dict(datadict, model, edata, by_id=by_id)

            dicts.append(datadict)

    return pd.DataFrame.from_records(dicts, columns=cols)




[docs]
def getSimulationObservablesAsDataFrame(
    model: amici.Model,
    edata_list: ExpDatas,
    rdata_list: ReturnDatas,
    by_id: Optional[bool] = False,
) -> pd.DataFrame:
    """
    Write Observables from simulation results as DataFrame.

    :param model:
        Model instance.

    :param edata_list:
        list of ExpData instances with experimental data.
        May also be a single ExpData instance.

    :param rdata_list:
        list of ReturnData instances corresponding to ExpData.
        May also be a single ReturnData instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        pandas DataFrame with conditions/timepoints as rows and observables as
        columns.
    """
    edata_list = _process_edata_list(edata_list)
    rdata_list = _process_rdata_list(rdata_list)

    # list of all column names using either names or ids
    cols = _get_extended_observable_cols(model, by_id=by_id)

    # aggregate records
    dicts = []
    for edata, rdata in zip(edata_list, rdata_list):
        for i_time, timepoint in enumerate(rdata["t"]):
            datadict = {
                "time": timepoint,
                "datatype": "simulation",
            }
            # append simulations
            for i_obs, obs in enumerate(
                _get_names_or_ids(model, "Observable", by_id=by_id)
            ):
                datadict[obs] = rdata["y"][i_time, i_obs]
                datadict[obs + "_std"] = rdata["sigmay"][i_time, i_obs]

            # use edata to fill conditions columns
            _fill_conditions_dict(datadict, model, edata, by_id=by_id)

            # append to dataframe
            dicts.append(datadict)

    return pd.DataFrame.from_records(dicts, columns=cols)




[docs]
def getSimulationStatesAsDataFrame(
    model: amici.Model,
    edata_list: ExpDatas,
    rdata_list: ReturnDatas,
    by_id: Optional[bool] = False,
) -> pd.DataFrame:
    """
    Get model state according to lists of ReturnData and ExpData.

    :param model:
        Model instance.

    :param edata_list:
        list of ExpData instances with experimental data.
        May also be a single ExpData instance.

    :param rdata_list:
        list of ReturnData instances corresponding to ExpData.
        May also be a single ReturnData instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return: pandas DataFrame with conditions/timepoints as rows and
        state variables as columns.
    """
    edata_list = _process_edata_list(edata_list)
    rdata_list = _process_rdata_list(rdata_list)

    # get conditions and state column names by name or id
    cols = _get_state_cols(model, by_id=by_id)

    # aggregate records
    dicts = []
    for edata, rdata in zip(edata_list, rdata_list):
        for i_time, timepoint in enumerate(rdata["t"]):
            datadict = {
                "time": timepoint,
            }

            # append states
            for i_state, state in enumerate(
                _get_names_or_ids(model, "State", by_id=by_id)
            ):
                datadict[state] = rdata["x"][i_time, i_state]

            # use data to fill condition columns
            _fill_conditions_dict(datadict, model, edata, by_id=by_id)

            # append to dataframe
            dicts.append(datadict)

    return pd.DataFrame.from_records(dicts, columns=cols)




[docs]
def get_expressions_as_dataframe(
    model: amici.Model,
    edata_list: ExpDatas,
    rdata_list: ReturnDatas,
    by_id: Optional[bool] = False,
) -> pd.DataFrame:
    """
    Get values of model expressions from lists of ReturnData as DataFrame.

    :param model:
        Model instance.

    :param edata_list:
        list of ExpData instances with experimental data.
        May also be a single ExpData instance.

    :param rdata_list:
        list of ReturnData instances corresponding to ExpData.
        May also be a single ReturnData instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return: pandas DataFrame with conditions/timepoints as rows and
        model expressions as columns.
    """
    edata_list = _process_edata_list(edata_list)
    rdata_list = _process_rdata_list(rdata_list)

    # get conditions and state column names by name or id
    cols = _get_expression_cols(model, by_id=by_id)

    # aggregate records
    dicts = []
    for edata, rdata in zip(edata_list, rdata_list):
        for i_time, timepoint in enumerate(rdata["t"]):
            datadict = {
                "time": timepoint,
            }

            # append expressions
            for i_expr, expr in enumerate(
                _get_names_or_ids(model, "Expression", by_id=by_id)
            ):
                datadict[expr] = rdata["w"][i_time, i_expr]

            # use data to fill condition columns
            _fill_conditions_dict(datadict, model, edata, by_id=by_id)

            # append to dataframe
            dicts.append(datadict)

    return pd.DataFrame.from_records(dicts, columns=cols)




[docs]
def getResidualsAsDataFrame(
    model: amici.Model,
    edata_list: ExpDatas,
    rdata_list: ReturnDatas,
    by_id: Optional[bool] = False,
) -> pd.DataFrame:
    """
    Convert a list of ReturnData and ExpData to pandas DataFrame with
    residuals.

    :param model:
        Model instance.

    :param edata_list:
        list of ExpData instances with experimental data. May also be a
        single ExpData instance.

    :param rdata_list:
        list of ReturnData instances corresponding to ExpData. May also be a
        single ReturnData instance.

    :param by_id: bool, optional (default = False)
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        pandas DataFrame with conditions and residuals.
    """
    edata_list = _process_edata_list(edata_list)
    rdata_list = _process_rdata_list(rdata_list)

    # create observable and simulation dataframes
    df_edata = getDataObservablesAsDataFrame(model, edata_list, by_id=by_id)
    df_rdata = getSimulationObservablesAsDataFrame(
        model, edata_list, rdata_list, by_id=by_id
    )

    # get all column names using names or ids
    cols = _get_observable_cols(model, by_id=by_id)

    # aggregate records
    dicts = []
    for row in df_rdata.index:
        datadict = {
            "time": df_rdata.loc[row]["time"],
            "t_presim": df_rdata.loc[row]["t_presim"],
        }

        # iterate over observables
        for obs in _get_names_or_ids(model, "Observable", by_id=by_id):
            # compute residual and append to dict
            datadict[obs] = abs(
                (df_edata.loc[row][obs] - df_rdata.loc[row][obs])
                / df_rdata.loc[row][obs + "_std"]
            )

        # iterate over fixed parameters
        for par in _get_names_or_ids(model, "FixedParameter", by_id=by_id):
            # fill in conditions
            datadict[par] = df_rdata.loc[row][par]
            datadict[par + "_preeq"] = df_rdata.loc[row][par + "_preeq"]
            datadict[par + "_presim"] = df_rdata.loc[row][par + "_presim"]

        # append to dataframe
        dicts.append(datadict)

    return pd.DataFrame.from_records(dicts, columns=cols)



def _fill_conditions_dict(
    datadict: dict[str, float],
    model: AmiciModel,
    edata: amici.amici.ExpData,
    by_id: bool,
) -> dict[str, float]:
    """
    Helper function that fills in condition parameters from model and
    edata.

    :param datadict:
        dictionary in which condition parameters will be inserted
        as key value pairs.

    :param model:
        Model instance.

    :param edata:
        ExpData instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        dictionary with filled condition parameters.

    """
    datadict["condition_id"] = edata.id
    datadict["t_presim"] = edata.t_presim

    for i_par, par in enumerate(
        _get_names_or_ids(model, "FixedParameter", by_id=by_id)
    ):
        if len(edata.fixedParameters):
            datadict[par] = edata.fixedParameters[i_par]
        else:
            datadict[par] = model.getFixedParameters()[i_par]

        if len(edata.fixedParametersPreequilibration):
            datadict[par + "_preeq"] = edata.fixedParametersPreequilibration[
                i_par
            ]
        else:
            datadict[par + "_preeq"] = np.nan

        if len(edata.fixedParametersPresimulation):
            datadict[par + "_presim"] = edata.fixedParametersPresimulation[
                i_par
            ]
        else:
            datadict[par + "_presim"] = np.nan

    for i_par, par in enumerate(
        _get_names_or_ids(model, "Parameter", by_id=by_id)
    ):
        if len(edata.parameters):
            datadict[par] = edata.parameters[i_par]
        else:
            datadict[par] = model.getParameters()[i_par]

        if len(edata.pscale):
            datadict[par + "_scale"] = edata.pscale[i_par]
        else:
            datadict[par + "_scale"] = model.getParameterScale()[i_par]

    return datadict


def _get_extended_observable_cols(model: AmiciModel, by_id: bool) -> list[str]:
    """
    Construction helper for extended observable dataframe headers.

    :param model:
        Model instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        column names as list.
    """
    return (
        ["condition_id", "time", "datatype", "t_presim"]
        + _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        + [
            name + "_preeq"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + [
            name + "_presim"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Parameter", by_id=by_id)
        + [
            name + "_scale"
            for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Observable", by_id=by_id)
        + [
            name + "_std"
            for name in _get_names_or_ids(model, "Observable", by_id=by_id)
        ]
    )


def _get_observable_cols(model: AmiciModel, by_id: bool) -> list[str]:
    """
    Construction helper for observable dataframe headers.

    :param model:
        Model instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        column names as list.
    """
    return (
        ["condition_id", "time", "t_presim"]
        + _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        + [
            name + "_preeq"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + [
            name + "_presim"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Parameter", by_id=by_id)
        + [
            name + "_scale"
            for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Observable", by_id=by_id)
    )


def _get_state_cols(model: AmiciModel, by_id: bool) -> list[str]:
    """
    Construction helper for state dataframe headers.

    :param model:
        Model instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        column names as list.
    """
    return (
        ["condition_id", "time", "t_presim"]
        + _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        + [
            name + "_preeq"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + [
            name + "_presim"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Parameter", by_id=by_id)
        + [
            name + "_scale"
            for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "State", by_id=by_id)
    )


def _get_expression_cols(model: AmiciModel, by_id: bool) -> list[str]:
    """Construction helper for expression dataframe headers.

    :param model:
        Model instance.

    :param by_id:
        If True, ids are used as identifiers, otherwise the possibly more
        descriptive names.

    :return:
        column names as list.
    """
    return (
        ["condition_id", "time", "t_presim"]
        + _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        + [
            name + "_preeq"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + [
            name + "_presim"
            for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Parameter", by_id=by_id)
        + [
            name + "_scale"
            for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
        ]
        + _get_names_or_ids(model, "Expression", by_id=by_id)
    )


def _get_names_or_ids(
    model: AmiciModel, variable: str, by_id: bool
) -> list[str]:
    """
    Obtains a unique list of identifiers for the specified variable.
    First tries model.getVariableNames and then uses model.getVariableIds.

    :param model:
        Model instance.

    :param variable:
        variable name.

    :param by_id:
        If True, ids are used as identifiers, otherwise first the possibly
        more descriptive names are used.

    :return:
        column names as list.
    """
    # check whether variable type permitted
    variable_options = [
        "Parameter",
        "FixedParameter",
        "Observable",
        "State",
        "Expression",
    ]
    if variable not in variable_options:
        raise ValueError("Variable must be in " + str(variable_options))

    # extract attributes
    names = list(getattr(model, f"get{variable}Names")())
    ids = list(getattr(model, f"get{variable}Ids")())

    # find out if model has names and ids
    has_names = getattr(model, f"has{variable}Names")()
    has_ids = getattr(model, f"has{variable}Ids")()

    # extract labels
    if not by_id and has_names and len(set(names)) == len(names):
        # use variable names
        return names
    elif has_ids:
        # use variable ids
        return ids
    else:
        # unable to create unique labels
        if by_id:
            msg = f"Model {variable} ids are not set."
        else:
            msg = (
                f"Model {variable} names are not unique and "
                f"{variable} ids are not set."
            )
        raise ValueError(msg)


def _get_specialized_fixed_parameters(
    model: AmiciModel,
    condition: Union[dict[str, SupportsFloat], pd.Series],
    overwrite: Union[dict[str, SupportsFloat], pd.Series],
    by_id: bool,
) -> list[float]:
    """
    Copies values in condition and overwrites them according to key
    value pairs specified in overwrite.

    :param model:
        Model instance.
    :param condition:
        fixedParameter values.
    :param overwrite:
        dict specifying which values in condition are to be replaced.
    :param by_id:
        bool
            If True, ids are used as identifiers, otherwise the possibly more
            descriptive names.

    :return:
        overwritten FixedParameter as list.
    """
    cond = copy.deepcopy(condition)
    for field in overwrite:
        cond[field] = overwrite[field]
    return [
        float(cond[name])
        for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
    ]


def constructEdataFromDataFrame(
    df: pd.DataFrame,
    model: AmiciModel,
    condition: pd.Series,
    by_id: Optional[bool] = False,
) -> amici.amici.ExpData:
    """
    Constructs an ExpData instance according to the provided Model
    and DataFrame.

    :param df:
        pd.DataFrame with Observable Names/Ids as columns.
        Standard deviations may be specified by appending '_std' as suffix.

    :param model:
        Model instance.

    :param condition:
        pd.Series with (Fixed)Parameter Names/Ids as columns.
        Preequilibration conditions may be specified by appending
        '_preeq' as suffix. Presimulation conditions may be specified by
        appending '_presim' as suffix. Parameter scales may be specified by
        appending '_scale' as suffix.

    :param by_id:
        Indicate whether in the arguments, column headers are based on ids or
        names. This should correspond to the way `df` and `condition` was
        created in the first place.

    :return:
        ExpData instance.
    """
    # initialize edata
    edata = amici.ExpData(model.get())

    # timepoints
    df = df.sort_values(by="time", ascending=True)
    edata.setTimepoints(df["time"].values.astype(float))

    # get fixed parameters from condition
    overwrite_preeq = {}
    overwrite_presim = {}
    for par in list(_get_names_or_ids(model, "FixedParameter", by_id=by_id)):
        if par + "_preeq" in condition.keys() and not math.isnan(
            condition[par + "_preeq"].astype(float)
        ):
            overwrite_preeq[par] = condition[par + "_preeq"].astype(float)
        if par + "_presim" in condition.keys() and not math.isnan(
            condition[par + "_presim"].astype(float)
        ):
            overwrite_presim[par] = condition[par + "_presim"].astype(float)

    # fill in fixed parameters
    edata.fixedParameters = (
        condition[_get_names_or_ids(model, "FixedParameter", by_id=by_id)]
        .astype(float)
        .values
    )

    # fill in parameters
    edata.parameters = (
        condition[_get_names_or_ids(model, "Parameter", by_id=by_id)]
        .astype(float)
        .values
    )

    edata.pscale = amici.parameterScalingFromIntVector(
        [
            amici.ParameterScaling(condition[par + "_scale"].astype(int))
            for par in list(_get_names_or_ids(model, "Parameter", by_id=by_id))
        ]
    )

    # fill in preequilibration parameters
    if any(
        [overwrite_preeq[key] != condition[key] for key in overwrite_preeq]
    ):
        edata.fixedParametersPreequilibration = (
            _get_specialized_fixed_parameters(
                model, condition, overwrite_preeq, by_id=by_id
            )
        )
    elif len(overwrite_preeq):
        edata.fixedParametersPreequilibration = copy.deepcopy(
            edata.fixedParameters
        )

    # fill in presimulation parameters
    if any(
        [
            overwrite_presim[key] != condition[key]
            for key in overwrite_presim.keys()
        ]
    ):
        edata.fixedParametersPresimulation = _get_specialized_fixed_parameters(
            model, condition, overwrite_presim, by_id=by_id
        )
    elif len(overwrite_presim.keys()):
        edata.fixedParametersPresimulation = copy.deepcopy(
            edata.fixedParameters
        )

    # fill in presimulation time
    if "t_presim" in condition.keys():
        edata.t_presim = float(condition["t_presim"])

    # fill in data and stds
    for obs_index, obs in enumerate(
        _get_names_or_ids(model, "Observable", by_id=by_id)
    ):
        if obs in df.keys():
            edata.setObservedData(df[obs].values.astype(float), obs_index)
        if obs + "_std" in df.keys():
            edata.setObservedDataStdDev(
                df[obs + "_std"].values.astype(float), obs_index
            )

    return edata



[docs]
def getEdataFromDataFrame(
    model: AmiciModel, df: pd.DataFrame, by_id: Optional[bool] = False
) -> list[amici.amici.ExpData]:
    """
    Constructs a ExpData instances according to the provided Model and
    DataFrame.

    :param df:
        dataframe with Observable Names/Ids, FixedParameter Names/Ids
        and time as columns. Standard deviations may be specified by
        appending '_std' as suffix. Preequilibration fixedParameters may be
        specified by appending '_preeq' as suffix. Presimulation
        fixedParameters may be specified by appending '_presim' as suffix.
        Presimulation time may be specified as 't_presim' column.

    :param model:
        Model instance.

    :param by_id:
        Whether the column names in `df` are based on ids or names,
        corresponding to how the dataframe was created in the first place.

    :return:
        list of ExpData instances.
    """
    edata_list = []

    # aggregate features that define a condition

    # fixed parameters
    condition_parameters = _get_names_or_ids(
        model, "FixedParameter", by_id=by_id
    )
    # preeq and presim parameters
    for par in _get_names_or_ids(model, "FixedParameter", by_id=by_id):
        if par + "_preeq" in df.columns:
            condition_parameters.append(par + "_preeq")
        if par + "_presim" in df.columns:
            condition_parameters.append(par + "_presim")
    # parameters & scales
    for par in _get_names_or_ids(model, "Parameter", by_id=by_id):
        condition_parameters.append(par)
        condition_parameters.append(par + "_scale")
    # presimulation time
    if "t_presim" in df.columns:
        condition_parameters.append("t_presim")
    # drop duplicates to create final conditions
    conditions = df[condition_parameters].drop_duplicates()

    # iterate over conditions
    for ir, row in conditions.iterrows():
        # subselect rows that match condition
        selected = np.ones((len(df),), dtype=bool)
        for par_label, par in row.items():
            if math.isnan(par):
                selected = selected & np.isnan(
                    df[par_label].astype(float).values
                )
            else:
                selected = selected & (df[par_label] == par)
        edata_df = df[selected]

        edata_list.append(
            constructEdataFromDataFrame(edata_df, model, row, by_id=by_id)
        )

    return edata_list