Source code for amici.pandas

"""
Pandas Wrappers
---------------
This module contains convenience wrappers that allow for easy interconversion
between C++ objects from :mod:`amici.amici` and pandas DataFrames
"""

import pandas as pd
import numpy as np
import math
import copy

from typing import List, Union, Optional, Dict, SupportsFloat
from .numpy import ExpDataView
import amici

__all__ = [
    'get_expressions_as_dataframe',
    'getEdataFromDataFrame',
    'getDataObservablesAsDataFrame',
    'getSimulationObservablesAsDataFrame',
    'getSimulationStatesAsDataFrame',
    'getResidualsAsDataFrame'
]

ExpDatas = Union[
    List[amici.amici.ExpData], List[amici.ExpDataPtr],
    amici.amici.ExpData, amici.ExpDataPtr
]
ReturnDatas = Union[
    List[amici.ReturnDataView], amici.ReturnDataView
]

AmiciModel = Union[amici.ModelPtr, amici.Model]


def _process_edata_list(edata_list: ExpDatas) -> List[amici.amici.ExpData]:
    """
    Maps single instances of :class:`amici.amici.ExpData` to lists of
    :class:`amici.amici.ExpData`

    :param edata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(edata_list, (amici.amici.ExpData, amici.ExpDataPtr)):
        return [edata_list]
    else:
        return edata_list


def _process_rdata_list(rdata_list: ReturnDatas) -> List[amici.ReturnDataView]:
    """
    Maps single instances of :class:`amici.ReturnData` to lists of
    :class:`amici.ReturnData`

    :param rdata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(rdata_list, amici.ReturnDataView):
        return [rdata_list]
    else:
        return rdata_list


[docs]def getDataObservablesAsDataFrame( model: AmiciModel, edata_list: ExpDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Write Observables from experimental data as DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param by_id: If True, uses observable ids as column names in the generated DataFrame, otherwise the possibly more descriptive observable names are used. :return: pandas DataFrame with conditions/timepoints as rows and observables as columns. """ edata_list = _process_edata_list(edata_list) # list of all column names using either ids or names cols = _get_extended_observable_cols(model, by_id=by_id) # aggregate records dicts = [] for edata in edata_list: npdata = ExpDataView(edata) for i_time, timepoint in enumerate(edata.getTimepoints()): datadict = { 'time': timepoint, 'datatype': 'data' } # add observables and noises for i_obs, obs in enumerate(_get_names_or_ids( model, 'Observable', by_id=by_id)): datadict[obs] = npdata['observedData'][i_time, i_obs] datadict[obs + '_std'] = \ npdata['observedDataStdDev'][i_time, i_obs] # add conditions _fill_conditions_dict(datadict, model, edata, by_id=by_id) dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getSimulationObservablesAsDataFrame( model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False ) -> pd.DataFrame: """ Write Observables from simulation results as DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions/timepoints as rows and observables as columns. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # list of all column names using either names or ids cols = _get_extended_observable_cols(model, by_id=by_id) # aggregate records dicts = [] for edata, rdata in zip(edata_list, rdata_list): for i_time, timepoint in enumerate(rdata['t']): datadict = { 'time': timepoint, 'datatype': 'simulation', } # append simulations for i_obs, obs in enumerate(_get_names_or_ids( model, 'Observable', by_id=by_id)): datadict[obs] = rdata['y'][i_time, i_obs] datadict[obs + '_std'] = rdata['sigmay'][i_time, i_obs] # use edata to fill conditions columns _fill_conditions_dict(datadict, model, edata, by_id=by_id) # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getSimulationStatesAsDataFrame( model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Get model state according to lists of ReturnData and ExpData. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions/timepoints as rows and state variables as columns. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # get conditions and state column names by name or id cols = _get_state_cols(model, by_id=by_id) # aggregate records dicts = [] for edata, rdata in zip(edata_list, rdata_list): for i_time, timepoint in enumerate(rdata['t']): datadict = { 'time': timepoint, } # append states for i_state, state in enumerate( _get_names_or_ids(model, 'State', by_id=by_id)): datadict[state] = rdata['x'][i_time, i_state] # use data to fill condition columns _fill_conditions_dict(datadict, model, edata, by_id=by_id) # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def get_expressions_as_dataframe( model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Get values of model expressions from lists of ReturnData as DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions/timepoints as rows and model expressions as columns. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # get conditions and state column names by name or id cols = _get_expression_cols(model, by_id=by_id) # aggregate records dicts = [] for edata, rdata in zip(edata_list, rdata_list): for i_time, timepoint in enumerate(rdata['t']): datadict = { 'time': timepoint, } # append expressions for i_expr, expr in enumerate( _get_names_or_ids(model, 'Expression', by_id=by_id)): datadict[expr] = rdata['w'][i_time, i_expr] # use data to fill condition columns _fill_conditions_dict(datadict, model, edata, by_id=by_id) # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getResidualsAsDataFrame(model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Convert a list of ReturnData and ExpData to pandas DataFrame with residuals. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: bool, optional (default = False) If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions and residuals. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # create observable and simulation dataframes df_edata = getDataObservablesAsDataFrame( model, edata_list, by_id=by_id) df_rdata = getSimulationObservablesAsDataFrame( model, edata_list, rdata_list, by_id=by_id) # get all column names using names or ids cols = _get_observable_cols(model, by_id=by_id) # aggregate records dicts = [] for row in df_rdata.index: datadict = { 'time': df_rdata.loc[row]['time'], 't_presim': df_rdata.loc[row]['t_presim'] } # iterate over observables for obs in _get_names_or_ids(model, 'Observable', by_id=by_id): # compute residual and append to dict datadict[obs] = abs( (df_edata.loc[row][obs] - df_rdata.loc[row][obs]) / df_rdata.loc[row][obs + '_std']) # iterate over fixed parameters for par in _get_names_or_ids(model, 'FixedParameter', by_id=by_id): # fill in conditions datadict[par] = df_rdata.loc[row][par] datadict[par + '_preeq'] = df_rdata.loc[row][par + '_preeq'] datadict[par + '_presim'] = df_rdata.loc[row][par + '_presim'] # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
def _fill_conditions_dict(datadict: Dict[str, float], model: AmiciModel, edata: amici.amici.ExpData, by_id: bool) -> Dict[str, float]: """ Helper function that fills in condition parameters from model and edata. :param datadict: dictionary in which condition parameters will be inserted as key value pairs. :param model: Model instance. :param edata: ExpData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: dictionary with filled condition parameters. """ datadict['condition_id'] = edata.id datadict['t_presim'] = edata.t_presim for i_par, par in enumerate( _get_names_or_ids(model, 'FixedParameter', by_id=by_id)): if len(edata.fixedParameters): datadict[par] = edata.fixedParameters[i_par] else: datadict[par] = model.getFixedParameters()[i_par] if len(edata.fixedParametersPreequilibration): datadict[par + '_preeq'] = \ edata.fixedParametersPreequilibration[i_par] else: datadict[par + '_preeq'] = np.nan if len(edata.fixedParametersPresimulation): datadict[par + '_presim'] = \ edata.fixedParametersPresimulation[i_par] else: datadict[par + '_presim'] = np.nan return datadict def _get_extended_observable_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for extended observable dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['condition_id', 'time', 'datatype', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'Observable', by_id=by_id) + \ [name + '_std' for name in _get_names_or_ids(model, 'Observable', by_id=by_id)] def _get_observable_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for observable dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['condition_id', 'time', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'Observable', by_id=by_id) def _get_state_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for state dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['condition_id', 'time', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'State', by_id=by_id) def _get_expression_cols(model: AmiciModel, by_id: bool) -> List[str]: """Construction helper for expression dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['condition_id', 'time', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'Expression', by_id=by_id) def _get_names_or_ids(model: AmiciModel, variable: str, by_id: bool) -> List[str]: """ Obtains a unique list of identifiers for the specified variable. First tries model.getVariableNames and then uses model.getVariableIds. :param model: Model instance. :param variable: variable name. :param by_id: If True, ids are used as identifiers, otherwise first the possibly more descriptive names are used. :return: column names as list. """ # check whether variable type permitted variable_options = [ 'Parameter', 'FixedParameter', 'Observable', 'State', 'Expression' ] if variable not in variable_options: raise ValueError('Variable must be in ' + str(variable_options)) # extract attributes names = list(getattr(model, f'get{variable}Names')()) ids = list(getattr(model, f'get{variable}Ids')()) # find out if model has names and ids has_names = getattr(model, f'has{variable}Names')() has_ids = getattr(model, f'has{variable}Ids')() # extract labels if not by_id and has_names and len(set(names)) == len(names): # use variable names return names elif has_ids: # use variable ids return ids else: # unable to create unique labels if by_id: msg = f"Model {variable} ids are not set." else: msg = f"Model {variable} names are not unique and " \ f"{variable} ids are not set." raise ValueError(msg) def _get_specialized_fixed_parameters( model: AmiciModel, condition: Union[Dict[str, SupportsFloat], pd.Series], overwrite: Union[Dict[str, SupportsFloat], pd.Series], by_id: bool ) -> List[float]: """ Copies values in condition and overwrites them according to key value pairs specified in overwrite. :param model: Model instance. :param condition: fixedParameter values. :param overwrite: dict specifying which values in condition are to be replaced. :param by_id: bool If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: overwritten FixedParameter as list. """ cond = copy.deepcopy(condition) for field in overwrite: cond[field] = overwrite[field] return [float(cond[name]) for name in _get_names_or_ids( model, 'FixedParameter', by_id=by_id)]
[docs]def constructEdataFromDataFrame( df: pd.DataFrame, model: AmiciModel, condition: pd.Series, by_id: Optional[bool] = False ) -> amici.amici.ExpData: """ Constructs an ExpData instance according to the provided Model and DataFrame. :param df: pd.DataFrame with Observable Names/Ids as columns. Standard deviations may be specified by appending '_std' as suffix. :param model: Model instance. :param condition: pd.Series with FixedParameter Names/Ids as columns. Preequilibration conditions may be specified by appending '_preeq' as suffix. Presimulation conditions may be specified by appending '_presim' as suffix. :param by_id: Indicate whether in the arguments, column headers are based on ids or names. This should correspond to the way `df` and `condition` was created in the first place. :return: ExpData instance. """ # initialize edata edata = amici.ExpData(model.get()) # timepoints df = df.sort_values(by='time', ascending=True) edata.setTimepoints(df['time'].values.astype(float)) # get fixed parameters from condition overwrite_preeq = {} overwrite_presim = {} for par in list(_get_names_or_ids(model, 'FixedParameter', by_id=by_id)): if par + '_preeq' in condition.keys() \ and not math.isnan(condition[par + '_preeq'].astype(float)): overwrite_preeq[par] = condition[par + '_preeq'].astype(float) if par + '_presim' in condition.keys() \ and not math.isnan(condition[par + '_presim'].astype(float)): overwrite_presim[par] = condition[par + '_presim'].astype(float) # fill in fixed parameters edata.fixedParameters = condition[ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) ].astype(float).values # fill in preequilibration parameters if any([overwrite_preeq[key] != condition[key] for key in overwrite_preeq]): edata.fixedParametersPreequilibration = \ _get_specialized_fixed_parameters( model, condition, overwrite_preeq, by_id=by_id) elif len(overwrite_preeq): edata.fixedParametersPreequilibration = copy.deepcopy( edata.fixedParameters ) # fill in presimulation parameters if any([overwrite_presim[key] != condition[key] for key in overwrite_presim.keys()]): edata.fixedParametersPresimulation = _get_specialized_fixed_parameters( model, condition, overwrite_presim, by_id=by_id ) elif len(overwrite_presim.keys()): edata.fixedParametersPresimulation = copy.deepcopy( edata.fixedParameters ) # fill in presimulation time if 't_presim' in condition.keys(): edata.t_presim = float(condition['t_presim']) # fill in data and stds for obs_index, obs in enumerate( _get_names_or_ids(model, 'Observable', by_id=by_id)): if obs in df.keys(): edata.setObservedData(df[obs].values.astype(float), obs_index) if obs + '_std' in df.keys(): edata.setObservedDataStdDev( df[obs + '_std'].values.astype(float), obs_index ) return edata
[docs]def getEdataFromDataFrame( model: AmiciModel, df: pd.DataFrame, by_id: Optional[bool] = False ) -> List[amici.amici.ExpData]: """ Constructs a ExpData instances according to the provided Model and DataFrame. :param df: dataframe with Observable Names/Ids, FixedParameter Names/Ids and time as columns. Standard deviations may be specified by appending '_std' as suffix. Preequilibration fixedParameters may be specified by appending '_preeq' as suffix. Presimulation fixedParameters may be specified by appending '_presim' as suffix. Presimulation time may be specified as 't_presim' column. :param model: Model instance. :param by_id: Whether the column names in `df` are based on ids or names, corresponding to how the dataframe was created in the first place. :return: list of ExpData instances. """ edata_list = [] # aggregate features that define a condition # fixed parameters condition_parameters = _get_names_or_ids(model, 'FixedParameter', by_id=by_id) # preeq and presim parameters for par in _get_names_or_ids(model, 'FixedParameter', by_id=by_id): if par + '_preeq' in df.columns: condition_parameters.append(par + '_preeq') if par + '_presim' in df.columns: condition_parameters.append(par + '_presim') # presimulation time if 't_presim' in df.columns: condition_parameters.append('t_presim') # drop duplicates to create final conditions conditions = df[condition_parameters].drop_duplicates() # iterate over conditions for ir, row in conditions.iterrows(): # subselect rows that match condition selected = np.ones((len(df),), dtype=bool) for par_label, par in row.iteritems(): if math.isnan(par): selected = selected & np.isnan( df[par_label].astype(float).values ) else: selected = selected & (df[par_label] == par) edata_df = df[selected] edata_list.append( constructEdataFromDataFrame(edata_df, model, row, by_id=by_id) ) return edata_list