Source code for amici.pandas

"""
Pandas Wrappers
---------------
This modules contains convenience wrappers that allow for easy interconversion
between C++ objects from :mod:`amici.amici` and pandas DataFrames
"""

import pandas as pd
import numpy as np
import math
import copy

from typing import List, Union, Optional, Dict, SupportsFloat
from .numpy import ExpDataView
import amici

ExpDatas = Union[
    List[amici.amici.ExpData], List[amici.ExpDataPtr],
    amici.amici.ExpData, amici.ExpDataPtr
]
ReturnDatas = Union[
    List[amici.ReturnDataView], amici.ReturnDataView
]

AmiciModel = Union[amici.ModelPtr, amici.Model]


def _process_edata_list(edata_list: ExpDatas) -> List[amici.amici.ExpData]:
    """
    Maps single instances of :class:`amici.amici.ExpData` to lists of
    :class:`amici.amici.ExpData`

    :param edata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(edata_list, (amici.amici.ExpData, amici.ExpDataPtr)):
        return [edata_list]
    else:
        return edata_list


def _process_rdata_list(rdata_list: ReturnDatas) -> List[amici.ReturnDataView]:
    """
    Maps single instances of :class:`amici.ReturnData` to lists of
    :class:`amici.ReturnData`

    :param rdata_list:
        list of instances or single instance

    :return:
        list of instance(s)
    """
    if isinstance(rdata_list, amici.ReturnDataView):
        return [rdata_list]
    else:
        return rdata_list


[docs]def getDataObservablesAsDataFrame( model: AmiciModel, edata_list: ExpDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Write Observables from experimental data as DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param by_id: If True, uses observable ids as column names in the generated DataFrame, otherwise the possibly more descriptive observable names are used. :return: pandas DataFrame with conditions/timepoints as rows and observables as columns. """ edata_list = _process_edata_list(edata_list) # list of all column names using either ids or names cols = _get_extended_observable_cols(model, by_id=by_id) # aggregate records dicts = [] for edata in edata_list: npdata = ExpDataView(edata) for i_time, timepoint in enumerate(edata.getTimepoints()): datadict = { 'time': timepoint, 'datatype': 'data' } # add observables and noises for i_obs, obs in enumerate(_get_names_or_ids( model, 'Observable', by_id=by_id)): datadict[obs] = npdata['observedData'][i_time, i_obs] datadict[obs + '_std'] = \ npdata['observedDataStdDev'][i_time, i_obs] # add conditions _fill_conditions_dict(datadict, model, edata, by_id=by_id) dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getSimulationObservablesAsDataFrame( model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False ) -> pd.DataFrame: """ Write Observables from simulation results as DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions/timepoints as rows and state variables as columns. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # list of all column names using either names or ids cols = _get_extended_observable_cols(model, by_id=by_id) # aggregate recrods dicts = [] for edata, rdata in zip(edata_list, rdata_list): for i_time, timepoint in enumerate(rdata['t']): datadict = { 'time': timepoint, 'datatype': 'simulation', } # append simulations for i_obs, obs in enumerate(_get_names_or_ids( model, 'Observable', by_id=by_id)): datadict[obs] = rdata['y'][i_time, i_obs] datadict[obs + '_std'] = rdata['sigmay'][i_time, i_obs] # use edata to fill conditions columns _fill_conditions_dict(datadict, model, edata, by_id=by_id) # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getSimulationStatesAsDataFrame( model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Compute model residuals according to lists of ReturnData and ExpData. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions/timpoints as rows and observables as columns. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # get conditions and state column names by name or id cols = _get_state_cols(model, by_id=by_id) # aggregate records dicts = [] for edata, rdata in zip(edata_list, rdata_list): for i_time, timepoint in enumerate(rdata['t']): datadict = { 'time': timepoint, } # append states for i_state, state in enumerate( _get_names_or_ids(model, 'State', by_id=by_id)): datadict[state] = rdata['x'][i_time, i_state] # use data to fill condition columns _fill_conditions_dict(datadict, model, edata, by_id=by_id) # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
[docs]def getResidualsAsDataFrame(model: amici.Model, edata_list: ExpDatas, rdata_list: ReturnDatas, by_id: Optional[bool] = False) -> pd.DataFrame: """ Convert a list of ExpData to pandas DataFrame. :param model: Model instance. :param edata_list: list of ExpData instances with experimental data. May also be a single ExpData instance. :param rdata_list: list of ReturnData instances corresponding to ExpData. May also be a single ReturnData instance. :param by_id: bool, optional (default = False) If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: pandas DataFrame with conditions and observables. """ edata_list = _process_edata_list(edata_list) rdata_list = _process_rdata_list(rdata_list) # create observable and simulation dataframes df_edata = getDataObservablesAsDataFrame( model, edata_list, by_id=by_id) df_rdata = getSimulationObservablesAsDataFrame( model, edata_list, rdata_list, by_id=by_id) # get all column names using names or ids cols = _get_observable_cols(model, by_id=by_id) # aggregate records dicts = [] for row in df_rdata.index: datadict = { 'time': df_rdata.loc[row]['time'], 't_presim': df_rdata.loc[row]['t_presim'] } # iterate over observables for obs in _get_names_or_ids(model, 'Observable', by_id=by_id): # compute residual and append to dict datadict[obs] = abs( (df_edata.loc[row][obs] - df_rdata.loc[row][obs]) / df_rdata.loc[row][obs + '_std']) # iterate over fixed parameters for par in _get_names_or_ids(model, 'FixedParameter', by_id=by_id): # fill in conditions datadict[par] = df_rdata.loc[row][par] datadict[par + '_preeq'] = df_rdata.loc[row][par + '_preeq'] datadict[par + '_presim'] = df_rdata.loc[row][par + '_presim'] # append to dataframe dicts.append(datadict) return pd.DataFrame.from_records(dicts, columns=cols)
def _fill_conditions_dict(datadict: Dict[str, float], model: AmiciModel, edata: amici.amici.ExpData, by_id: bool) -> Dict[str, float]: """ Helper function that fills in condition parameters from model and edata. :param datadict: dictionary in which condition parameters will be inserted as key value pairs. :param model: Model instance. :param edata: ExpData instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: dictionary with filled condition parameters. """ datadict['t_presim'] = edata.t_presim for i_par, par in enumerate( _get_names_or_ids(model, 'FixedParameter', by_id=by_id)): if len(edata.fixedParameters): datadict[par] = edata.fixedParameters[i_par] else: datadict[par] = model.getFixedParameters()[i_par] if len(edata.fixedParametersPreequilibration): datadict[par + '_preeq'] = \ edata.fixedParametersPreequilibration[i_par] else: datadict[par + '_preeq'] = np.nan if len(edata.fixedParametersPresimulation): datadict[par + '_presim'] = \ edata.fixedParametersPresimulation[i_par] else: datadict[par + '_presim'] = np.nan return datadict def _get_extended_observable_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for extended observable dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['time', 'datatype', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'Observable', by_id=by_id) + \ [name + '_std' for name in _get_names_or_ids(model, 'Observable', by_id=by_id)] def _get_observable_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for observable dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['time', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'Observable', by_id=by_id) def _get_state_cols(model: AmiciModel, by_id: bool) -> List[str]: """ Construction helper for state dataframe headers. :param model: Model instance. :param by_id: If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: column names as list. """ return \ ['time', 't_presim'] + \ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) + \ [name + '_preeq' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ [name + '_presim' for name in _get_names_or_ids(model, 'FixedParameter', by_id=by_id)] + \ _get_names_or_ids(model, 'State', by_id=by_id) def _get_names_or_ids(model: AmiciModel, variable: str, by_id: bool) -> List[str]: """ Obtains a unique list of identifiers for the specified variable. First tries model.getVariableNames and then uses model.getVariableIds. :param model: Model instance. :param variable: variable name. :param by_id: If True, ids are used as identifiers, otherwise first the possibly more descriptive names are used. :return: column names as list. """ # check whether variable type permitted variable_options = ['Parameter', 'FixedParameter', 'Observable', 'State'] if variable not in variable_options: raise ValueError('Variable must be in ' + str(variable_options)) # extract attributes names = list(getattr(model, f'get{variable}Names')()) ids = list(getattr(model, f'get{variable}Ids')()) # find out if model has names and ids has_names = getattr(model, f'has{variable}Names')() has_ids = getattr(model, f'has{variable}Ids')() # extract labels if not by_id and has_names and len(set(names)) == len(names): # use variable names return names elif has_ids: # use variable ids return ids else: # unable to create unique labels if by_id: msg = f"Model {variable} ids are not set." else: msg = f"Model {variable} names are not unique and " \ f"{variable} ids are not set." raise ValueError(msg) def _get_specialized_fixed_parameters( model: AmiciModel, condition: Union[Dict[str, SupportsFloat], pd.Series], overwrite: Union[Dict[str, SupportsFloat], pd.Series], by_id: bool ) -> List[float]: """ Copies values in condition and overwrites them according to key value pairs specified in overwrite. :param model: Model instance. :param condition: fixedParameter values. :param overwrite: dict specifying which values in condition are to be replaced. :param by_id: bool If True, ids are used as identifiers, otherwise the possibly more descriptive names. :return: overwritten FixedParameter as list. Raises: """ cond = copy.deepcopy(condition) for field in overwrite: cond[field] = overwrite[field] return [float(cond[name]) for name in _get_names_or_ids( model, 'FixedParameter', by_id=by_id)]
[docs]def constructEdataFromDataFrame( df: pd.DataFrame, model: AmiciModel, condition: pd.Series, by_id: Optional[bool] = False ) -> amici.amici.ExpData: """ Constructs an ExpData instance according to the provided Model and DataFrame. :param df: pd.DataFrame with Observable Names/Ids as columns. Standard deviations may be specified by appending '_std' as suffix. :param model: Model instance. :param condition: pd.Series with FixedParameter Names/Ids as columns. Preequilibration conditions may be specified by appending '_preeq' as suffix. Presimulation conditions may be specified by appending '_presim' as suffix. :param by_id: Indicate whether in the arguments, column headers are based on ids or names. This should correspond to the way `df` and `condition` was created in the first place. :return: ExpData instance. """ # initialize edata edata = amici.ExpData(model.get()) # timepoints df = df.sort_values(by='time', ascending=True) edata.setTimepoints(df['time'].values.astype(float)) # get fixed parameters from condition overwrite_preeq = {} overwrite_presim = {} for par in list(_get_names_or_ids(model, 'FixedParameter', by_id=by_id)): if par + '_preeq' in condition.keys() \ and not math.isnan(condition[par + '_preeq'].astype(float)): overwrite_preeq[par] = condition[par + '_preeq'].astype(float) if par + '_presim' in condition.keys() \ and not math.isnan(condition[par + '_presim'].astype(float)): overwrite_presim[par] = condition[par + '_presim'].astype(float) # fill in fixed parameters edata.fixedParameters = condition[ _get_names_or_ids(model, 'FixedParameter', by_id=by_id) ].astype(float).values # fill in preequilibration parameters if any([overwrite_preeq[key] != condition[key] for key in overwrite_preeq.keys()]): edata.fixedParametersPreequilibration = \ _get_specialized_fixed_parameters( model, condition, overwrite_preeq, by_id=by_id) elif len(overwrite_preeq.keys()): edata.fixedParametersPreequilibration = copy.deepcopy( edata.fixedParameters ) # fill in presimulation parameters if any([overwrite_presim[key] != condition[key] for key in overwrite_presim.keys()]): edata.fixedParametersPresimulation = _get_specialized_fixed_parameters( model, condition, overwrite_presim, by_id=by_id ) elif len(overwrite_presim.keys()): edata.fixedParametersPresimulation = copy.deepcopy( edata.fixedParameters ) # fill in presimulation time if 't_presim' in condition.keys(): edata.t_presim = float(condition['t_presim']) # fill in data and stds for obs_index, obs in enumerate( _get_names_or_ids(model, 'Observable', by_id=by_id)): if obs in df.keys(): edata.setObservedData(df[obs].values.astype(float), obs_index) if obs + '_std' in df.keys(): edata.setObservedDataStdDev( df[obs + '_std'].values.astype(float), obs_index ) return edata
[docs]def getEdataFromDataFrame( model: AmiciModel, df: pd.DataFrame, by_id: Optional[bool] = False ) -> List[amici.amici.ExpData]: """ Constructs a ExpData instances according to the provided Model and DataFrame. :param df: dataframe with Observable Names/Ids, FixedParameter Names/Ids and time as columns. Standard deviations may be specified by appending '_std' as suffix. Preequilibration fixedParameters may be specified by appending '_preeq' as suffix. Presimulation fixedParameters may be specified by appending '_presim' as suffix. Presimulation time may be specified as 't_presim' column. :param model: Model instance. :param by_id: Whether the column names in `df` are based on ids or names, corresponding to how the dataframe was created in the first place. :return: list of ExpData instances. """ edata_list = [] # aggregate features that define a condition # fixed parameters condition_parameters = _get_names_or_ids(model, 'FixedParameter', by_id=by_id) # preeq and presim parameters for par in _get_names_or_ids(model, 'FixedParameter', by_id=by_id): if par + '_preeq' in df.columns: condition_parameters.append(par + '_preeq') if par + '_presim' in df.columns: condition_parameters.append(par + '_presim') # presimulation time if 't_presim' in df.columns: condition_parameters.append('t_presim') # drop duplicates to create final conditions conditions = df[condition_parameters].drop_duplicates() # iterate over conditions for ir, row in conditions.iterrows(): # subselect rows that match condition selected = np.ones((len(df),), dtype=bool) for par_label, par in row.iteritems(): if math.isnan(par): selected = selected & np.isnan( df[par_label].astype(float).values ) else: selected = selected & (df[par_label] == par) edata_df = df[selected] edata_list.append( constructEdataFromDataFrame(edata_df, model, row, by_id=by_id) ) return edata_list