"""
Pandas Wrappers
---------------
This module contains convenience wrappers that allow for easy interconversion
between C++ objects from :mod:`amici.amici` and pandas DataFrames
"""
import copy
import math
from typing import Optional, SupportsFloat, Union
import amici
import numpy as np
import pandas as pd
from .numpy import ExpDataView
__all__ = [
"get_expressions_as_dataframe",
"getEdataFromDataFrame",
"getDataObservablesAsDataFrame",
"getSimulationObservablesAsDataFrame",
"getSimulationStatesAsDataFrame",
"getResidualsAsDataFrame",
]
ExpDatas = Union[
list[amici.amici.ExpData],
list[amici.ExpDataPtr],
amici.amici.ExpData,
amici.ExpDataPtr,
]
ReturnDatas = Union[list[amici.ReturnDataView], amici.ReturnDataView]
AmiciModel = Union[amici.ModelPtr, amici.Model]
def _process_edata_list(edata_list: ExpDatas) -> list[amici.amici.ExpData]:
"""
Maps single instances of :class:`amici.amici.ExpData` to lists of
:class:`amici.amici.ExpData`
:param edata_list:
list of instances or single instance
:return:
list of instance(s)
"""
if isinstance(edata_list, (amici.amici.ExpData, amici.ExpDataPtr)):
return [edata_list]
else:
return edata_list
def _process_rdata_list(rdata_list: ReturnDatas) -> list[amici.ReturnDataView]:
"""
Maps single instances of :class:`amici.ReturnData` to lists of
:class:`amici.ReturnData`
:param rdata_list:
list of instances or single instance
:return:
list of instance(s)
"""
if isinstance(rdata_list, amici.ReturnDataView):
return [rdata_list]
else:
return rdata_list
[docs]
def getDataObservablesAsDataFrame(
model: AmiciModel, edata_list: ExpDatas, by_id: Optional[bool] = False
) -> pd.DataFrame:
"""
Write Observables from experimental data as DataFrame.
:param model:
Model instance.
:param edata_list:
list of ExpData instances with experimental data.
May also be a single ExpData instance.
:param by_id:
If True, uses observable ids as column names in the generated
DataFrame, otherwise the possibly more descriptive observable names
are used.
:return:
pandas DataFrame with conditions/timepoints as rows and observables as
columns.
"""
edata_list = _process_edata_list(edata_list)
# list of all column names using either ids or names
cols = _get_extended_observable_cols(model, by_id=by_id)
# aggregate records
dicts = []
for edata in edata_list:
npdata = ExpDataView(edata)
for i_time, timepoint in enumerate(edata.getTimepoints()):
datadict = {"time": timepoint, "datatype": "data"}
# add observables and noises
for i_obs, obs in enumerate(
_get_names_or_ids(model, "Observable", by_id=by_id)
):
datadict[obs] = npdata["observedData"][i_time, i_obs]
datadict[obs + "_std"] = npdata["observedDataStdDev"][
i_time, i_obs
]
# add conditions
_fill_conditions_dict(datadict, model, edata, by_id=by_id)
dicts.append(datadict)
return pd.DataFrame.from_records(dicts, columns=cols)
[docs]
def getSimulationObservablesAsDataFrame(
model: amici.Model,
edata_list: ExpDatas,
rdata_list: ReturnDatas,
by_id: Optional[bool] = False,
) -> pd.DataFrame:
"""
Write Observables from simulation results as DataFrame.
:param model:
Model instance.
:param edata_list:
list of ExpData instances with experimental data.
May also be a single ExpData instance.
:param rdata_list:
list of ReturnData instances corresponding to ExpData.
May also be a single ReturnData instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
pandas DataFrame with conditions/timepoints as rows and observables as
columns.
"""
edata_list = _process_edata_list(edata_list)
rdata_list = _process_rdata_list(rdata_list)
# list of all column names using either names or ids
cols = _get_extended_observable_cols(model, by_id=by_id)
# aggregate records
dicts = []
for edata, rdata in zip(edata_list, rdata_list):
for i_time, timepoint in enumerate(rdata["t"]):
datadict = {
"time": timepoint,
"datatype": "simulation",
}
# append simulations
for i_obs, obs in enumerate(
_get_names_or_ids(model, "Observable", by_id=by_id)
):
datadict[obs] = rdata["y"][i_time, i_obs]
datadict[obs + "_std"] = rdata["sigmay"][i_time, i_obs]
# use edata to fill conditions columns
_fill_conditions_dict(datadict, model, edata, by_id=by_id)
# append to dataframe
dicts.append(datadict)
return pd.DataFrame.from_records(dicts, columns=cols)
[docs]
def getSimulationStatesAsDataFrame(
model: amici.Model,
edata_list: ExpDatas,
rdata_list: ReturnDatas,
by_id: Optional[bool] = False,
) -> pd.DataFrame:
"""
Get model state according to lists of ReturnData and ExpData.
:param model:
Model instance.
:param edata_list:
list of ExpData instances with experimental data.
May also be a single ExpData instance.
:param rdata_list:
list of ReturnData instances corresponding to ExpData.
May also be a single ReturnData instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return: pandas DataFrame with conditions/timepoints as rows and
state variables as columns.
"""
edata_list = _process_edata_list(edata_list)
rdata_list = _process_rdata_list(rdata_list)
# get conditions and state column names by name or id
cols = _get_state_cols(model, by_id=by_id)
# aggregate records
dicts = []
for edata, rdata in zip(edata_list, rdata_list):
for i_time, timepoint in enumerate(rdata["t"]):
datadict = {
"time": timepoint,
}
# append states
for i_state, state in enumerate(
_get_names_or_ids(model, "State", by_id=by_id)
):
datadict[state] = rdata["x"][i_time, i_state]
# use data to fill condition columns
_fill_conditions_dict(datadict, model, edata, by_id=by_id)
# append to dataframe
dicts.append(datadict)
return pd.DataFrame.from_records(dicts, columns=cols)
[docs]
def get_expressions_as_dataframe(
model: amici.Model,
edata_list: ExpDatas,
rdata_list: ReturnDatas,
by_id: Optional[bool] = False,
) -> pd.DataFrame:
"""
Get values of model expressions from lists of ReturnData as DataFrame.
:param model:
Model instance.
:param edata_list:
list of ExpData instances with experimental data.
May also be a single ExpData instance.
:param rdata_list:
list of ReturnData instances corresponding to ExpData.
May also be a single ReturnData instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return: pandas DataFrame with conditions/timepoints as rows and
model expressions as columns.
"""
edata_list = _process_edata_list(edata_list)
rdata_list = _process_rdata_list(rdata_list)
# get conditions and state column names by name or id
cols = _get_expression_cols(model, by_id=by_id)
# aggregate records
dicts = []
for edata, rdata in zip(edata_list, rdata_list):
for i_time, timepoint in enumerate(rdata["t"]):
datadict = {
"time": timepoint,
}
# append expressions
for i_expr, expr in enumerate(
_get_names_or_ids(model, "Expression", by_id=by_id)
):
datadict[expr] = rdata["w"][i_time, i_expr]
# use data to fill condition columns
_fill_conditions_dict(datadict, model, edata, by_id=by_id)
# append to dataframe
dicts.append(datadict)
return pd.DataFrame.from_records(dicts, columns=cols)
[docs]
def getResidualsAsDataFrame(
model: amici.Model,
edata_list: ExpDatas,
rdata_list: ReturnDatas,
by_id: Optional[bool] = False,
) -> pd.DataFrame:
"""
Convert a list of ReturnData and ExpData to pandas DataFrame with
residuals.
:param model:
Model instance.
:param edata_list:
list of ExpData instances with experimental data. May also be a
single ExpData instance.
:param rdata_list:
list of ReturnData instances corresponding to ExpData. May also be a
single ReturnData instance.
:param by_id: bool, optional (default = False)
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
pandas DataFrame with conditions and residuals.
"""
edata_list = _process_edata_list(edata_list)
rdata_list = _process_rdata_list(rdata_list)
# create observable and simulation dataframes
df_edata = getDataObservablesAsDataFrame(model, edata_list, by_id=by_id)
df_rdata = getSimulationObservablesAsDataFrame(
model, edata_list, rdata_list, by_id=by_id
)
# get all column names using names or ids
cols = _get_observable_cols(model, by_id=by_id)
# aggregate records
dicts = []
for row in df_rdata.index:
datadict = {
"time": df_rdata.loc[row]["time"],
"t_presim": df_rdata.loc[row]["t_presim"],
}
# iterate over observables
for obs in _get_names_or_ids(model, "Observable", by_id=by_id):
# compute residual and append to dict
datadict[obs] = abs(
(df_edata.loc[row][obs] - df_rdata.loc[row][obs])
/ df_rdata.loc[row][obs + "_std"]
)
# iterate over fixed parameters
for par in _get_names_or_ids(model, "FixedParameter", by_id=by_id):
# fill in conditions
datadict[par] = df_rdata.loc[row][par]
datadict[par + "_preeq"] = df_rdata.loc[row][par + "_preeq"]
datadict[par + "_presim"] = df_rdata.loc[row][par + "_presim"]
# append to dataframe
dicts.append(datadict)
return pd.DataFrame.from_records(dicts, columns=cols)
def _fill_conditions_dict(
datadict: dict[str, float],
model: AmiciModel,
edata: amici.amici.ExpData,
by_id: bool,
) -> dict[str, float]:
"""
Helper function that fills in condition parameters from model and
edata.
:param datadict:
dictionary in which condition parameters will be inserted
as key value pairs.
:param model:
Model instance.
:param edata:
ExpData instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
dictionary with filled condition parameters.
"""
datadict["condition_id"] = edata.id
datadict["t_presim"] = edata.t_presim
for i_par, par in enumerate(
_get_names_or_ids(model, "FixedParameter", by_id=by_id)
):
if len(edata.fixedParameters):
datadict[par] = edata.fixedParameters[i_par]
else:
datadict[par] = model.getFixedParameters()[i_par]
if len(edata.fixedParametersPreequilibration):
datadict[par + "_preeq"] = edata.fixedParametersPreequilibration[
i_par
]
else:
datadict[par + "_preeq"] = np.nan
if len(edata.fixedParametersPresimulation):
datadict[par + "_presim"] = edata.fixedParametersPresimulation[
i_par
]
else:
datadict[par + "_presim"] = np.nan
for i_par, par in enumerate(
_get_names_or_ids(model, "Parameter", by_id=by_id)
):
if len(edata.parameters):
datadict[par] = edata.parameters[i_par]
else:
datadict[par] = model.getParameters()[i_par]
if len(edata.pscale):
datadict[par + "_scale"] = edata.pscale[i_par]
else:
datadict[par + "_scale"] = model.getParameterScale()[i_par]
return datadict
def _get_extended_observable_cols(model: AmiciModel, by_id: bool) -> list[str]:
"""
Construction helper for extended observable dataframe headers.
:param model:
Model instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
column names as list.
"""
return (
["condition_id", "time", "datatype", "t_presim"]
+ _get_names_or_ids(model, "FixedParameter", by_id=by_id)
+ [
name + "_preeq"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ [
name + "_presim"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Parameter", by_id=by_id)
+ [
name + "_scale"
for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Observable", by_id=by_id)
+ [
name + "_std"
for name in _get_names_or_ids(model, "Observable", by_id=by_id)
]
)
def _get_observable_cols(model: AmiciModel, by_id: bool) -> list[str]:
"""
Construction helper for observable dataframe headers.
:param model:
Model instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
column names as list.
"""
return (
["condition_id", "time", "t_presim"]
+ _get_names_or_ids(model, "FixedParameter", by_id=by_id)
+ [
name + "_preeq"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ [
name + "_presim"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Parameter", by_id=by_id)
+ [
name + "_scale"
for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Observable", by_id=by_id)
)
def _get_state_cols(model: AmiciModel, by_id: bool) -> list[str]:
"""
Construction helper for state dataframe headers.
:param model:
Model instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
column names as list.
"""
return (
["condition_id", "time", "t_presim"]
+ _get_names_or_ids(model, "FixedParameter", by_id=by_id)
+ [
name + "_preeq"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ [
name + "_presim"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Parameter", by_id=by_id)
+ [
name + "_scale"
for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
]
+ _get_names_or_ids(model, "State", by_id=by_id)
)
def _get_expression_cols(model: AmiciModel, by_id: bool) -> list[str]:
"""Construction helper for expression dataframe headers.
:param model:
Model instance.
:param by_id:
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
column names as list.
"""
return (
["condition_id", "time", "t_presim"]
+ _get_names_or_ids(model, "FixedParameter", by_id=by_id)
+ [
name + "_preeq"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ [
name + "_presim"
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Parameter", by_id=by_id)
+ [
name + "_scale"
for name in _get_names_or_ids(model, "Parameter", by_id=by_id)
]
+ _get_names_or_ids(model, "Expression", by_id=by_id)
)
def _get_names_or_ids(
model: AmiciModel, variable: str, by_id: bool
) -> list[str]:
"""
Obtains a unique list of identifiers for the specified variable.
First tries model.getVariableNames and then uses model.getVariableIds.
:param model:
Model instance.
:param variable:
variable name.
:param by_id:
If True, ids are used as identifiers, otherwise first the possibly
more descriptive names are used.
:return:
column names as list.
"""
# check whether variable type permitted
variable_options = [
"Parameter",
"FixedParameter",
"Observable",
"State",
"Expression",
]
if variable not in variable_options:
raise ValueError("Variable must be in " + str(variable_options))
# extract attributes
names = list(getattr(model, f"get{variable}Names")())
ids = list(getattr(model, f"get{variable}Ids")())
# find out if model has names and ids
has_names = getattr(model, f"has{variable}Names")()
has_ids = getattr(model, f"has{variable}Ids")()
# extract labels
if not by_id and has_names and len(set(names)) == len(names):
# use variable names
return names
elif has_ids:
# use variable ids
return ids
else:
# unable to create unique labels
if by_id:
msg = f"Model {variable} ids are not set."
else:
msg = (
f"Model {variable} names are not unique and "
f"{variable} ids are not set."
)
raise ValueError(msg)
def _get_specialized_fixed_parameters(
model: AmiciModel,
condition: Union[dict[str, SupportsFloat], pd.Series],
overwrite: Union[dict[str, SupportsFloat], pd.Series],
by_id: bool,
) -> list[float]:
"""
Copies values in condition and overwrites them according to key
value pairs specified in overwrite.
:param model:
Model instance.
:param condition:
fixedParameter values.
:param overwrite:
dict specifying which values in condition are to be replaced.
:param by_id:
bool
If True, ids are used as identifiers, otherwise the possibly more
descriptive names.
:return:
overwritten FixedParameter as list.
"""
cond = copy.deepcopy(condition)
for field in overwrite:
cond[field] = overwrite[field]
return [
float(cond[name])
for name in _get_names_or_ids(model, "FixedParameter", by_id=by_id)
]
def constructEdataFromDataFrame(
df: pd.DataFrame,
model: AmiciModel,
condition: pd.Series,
by_id: Optional[bool] = False,
) -> amici.amici.ExpData:
"""
Constructs an ExpData instance according to the provided Model
and DataFrame.
:param df:
pd.DataFrame with Observable Names/Ids as columns.
Standard deviations may be specified by appending '_std' as suffix.
:param model:
Model instance.
:param condition:
pd.Series with (Fixed)Parameter Names/Ids as columns.
Preequilibration conditions may be specified by appending
'_preeq' as suffix. Presimulation conditions may be specified by
appending '_presim' as suffix. Parameter scales may be specified by
appending '_scale' as suffix.
:param by_id:
Indicate whether in the arguments, column headers are based on ids or
names. This should correspond to the way `df` and `condition` was
created in the first place.
:return:
ExpData instance.
"""
# initialize edata
edata = amici.ExpData(model.get())
# timepoints
df = df.sort_values(by="time", ascending=True)
edata.setTimepoints(df["time"].values.astype(float))
# get fixed parameters from condition
overwrite_preeq = {}
overwrite_presim = {}
for par in list(_get_names_or_ids(model, "FixedParameter", by_id=by_id)):
if par + "_preeq" in condition.keys() and not math.isnan(
condition[par + "_preeq"].astype(float)
):
overwrite_preeq[par] = condition[par + "_preeq"].astype(float)
if par + "_presim" in condition.keys() and not math.isnan(
condition[par + "_presim"].astype(float)
):
overwrite_presim[par] = condition[par + "_presim"].astype(float)
# fill in fixed parameters
edata.fixedParameters = (
condition[_get_names_or_ids(model, "FixedParameter", by_id=by_id)]
.astype(float)
.values
)
# fill in parameters
edata.parameters = (
condition[_get_names_or_ids(model, "Parameter", by_id=by_id)]
.astype(float)
.values
)
edata.pscale = amici.parameterScalingFromIntVector(
[
amici.ParameterScaling(condition[par + "_scale"].astype(int))
for par in list(_get_names_or_ids(model, "Parameter", by_id=by_id))
]
)
# fill in preequilibration parameters
if any(
[overwrite_preeq[key] != condition[key] for key in overwrite_preeq]
):
edata.fixedParametersPreequilibration = (
_get_specialized_fixed_parameters(
model, condition, overwrite_preeq, by_id=by_id
)
)
elif len(overwrite_preeq):
edata.fixedParametersPreequilibration = copy.deepcopy(
edata.fixedParameters
)
# fill in presimulation parameters
if any(
[
overwrite_presim[key] != condition[key]
for key in overwrite_presim.keys()
]
):
edata.fixedParametersPresimulation = _get_specialized_fixed_parameters(
model, condition, overwrite_presim, by_id=by_id
)
elif len(overwrite_presim.keys()):
edata.fixedParametersPresimulation = copy.deepcopy(
edata.fixedParameters
)
# fill in presimulation time
if "t_presim" in condition.keys():
edata.t_presim = float(condition["t_presim"])
# fill in data and stds
for obs_index, obs in enumerate(
_get_names_or_ids(model, "Observable", by_id=by_id)
):
if obs in df.keys():
edata.setObservedData(df[obs].values.astype(float), obs_index)
if obs + "_std" in df.keys():
edata.setObservedDataStdDev(
df[obs + "_std"].values.astype(float), obs_index
)
return edata
[docs]
def getEdataFromDataFrame(
model: AmiciModel, df: pd.DataFrame, by_id: Optional[bool] = False
) -> list[amici.amici.ExpData]:
"""
Constructs a ExpData instances according to the provided Model and
DataFrame.
:param df:
dataframe with Observable Names/Ids, FixedParameter Names/Ids
and time as columns. Standard deviations may be specified by
appending '_std' as suffix. Preequilibration fixedParameters may be
specified by appending '_preeq' as suffix. Presimulation
fixedParameters may be specified by appending '_presim' as suffix.
Presimulation time may be specified as 't_presim' column.
:param model:
Model instance.
:param by_id:
Whether the column names in `df` are based on ids or names,
corresponding to how the dataframe was created in the first place.
:return:
list of ExpData instances.
"""
edata_list = []
# aggregate features that define a condition
# fixed parameters
condition_parameters = _get_names_or_ids(
model, "FixedParameter", by_id=by_id
)
# preeq and presim parameters
for par in _get_names_or_ids(model, "FixedParameter", by_id=by_id):
if par + "_preeq" in df.columns:
condition_parameters.append(par + "_preeq")
if par + "_presim" in df.columns:
condition_parameters.append(par + "_presim")
# parameters & scales
for par in _get_names_or_ids(model, "Parameter", by_id=by_id):
condition_parameters.append(par)
condition_parameters.append(par + "_scale")
# presimulation time
if "t_presim" in df.columns:
condition_parameters.append("t_presim")
# drop duplicates to create final conditions
conditions = df[condition_parameters].drop_duplicates()
# iterate over conditions
for ir, row in conditions.iterrows():
# subselect rows that match condition
selected = np.ones((len(df),), dtype=bool)
for par_label, par in row.items():
if math.isnan(par):
selected = selected & np.isnan(
df[par_label].astype(float).values
)
else:
selected = selected & (df[par_label] == par)
edata_df = df[selected]
edata_list.append(
constructEdataFromDataFrame(edata_df, model, row, by_id=by_id)
)
return edata_list