Source code for supereeg.model

from __future__ import division
from __future__ import print_function
import time
import copy
import warnings
import six
import pandas as pd
import numpy as np
import seaborn as sns
import deepdish as dd
import matplotlib.pyplot as plt
from .helpers import _get_corrmat, _r2z, _z2r, _log_rbf, _blur_corrmat, _plot_borderless,\
    _near_neighbor, _timeseries_recon, _count_overlapping, _plot_locs_connectome, \
    _plot_locs_hyp, _gray, _nifti_to_brain,\
    _unique, _union, _empty, _to_log_complex, _to_exp_real
from .brain import Brain
from .nifti import Nifti


[docs]class Model(object):
    """
    Model data object for the supereeg package

    This class holds your supereeg model.  To create an instance, pass a list
    of brain objects and the model will be generated from those brain objects.
    You can also add your own model by passing a numpy array as your matrix and
    the corresponding locations. Alternatively, you can bypass creating a
    new model by passing numerator, denominator, locations, and n_subs
    (see parameters for details).  Additionally, you can include a meta dictionary
    with any other information that you want to save with the model.

    Parameters
    ----------
    data : supereeg.Brain or list supereeg.Brain, supereeg.Nifti or list supereeg.Nifti, or Numpy.ndarray
        A supereeg.Brain object or supereeg.Nifti object,  list of objects, or a Numpy.ndarray of your model.
    locs : pandas.DataFrame or np.ndarray
        MNI coordinate (x,y,z) by number of electrode df containing electrode locations
    template : filepath
        Path to a template nifti file used to set model locations
    numerator : Numpy.ndarray
        (Optional) A locations x locations matrix comprising the sum of the log z-transformed
        correlation matrices over subjects.  If used, must also pass denominator,
        locs and n_subs. Otherwise, numerator will be computed from the brain
        object data.
    denominator : Numpy.ndarray
        (Optional) A locations x locations matrix comprising the sum of the log (weighted) number of
        subjects contributing to each matrix cell. If used, must also pass numerator,
        locs and n_subs. Otherwise, denominator will be computed from the brain
        object data.
    n_subs : int
        The number of subjects used to create the model.  Required if you pass
        numerator/denominator.  Otherwise computed automatically from the data.
    rbf_width : positive scalar
        The width of the radial basis function (RBF) used as a spatial prior for
        smoothing estimates at nearby locations.  (Default: 20)
    meta : dict
        Dict containing whatever you want:
        Initialized with a stability field {'stable':True}. This is changed
        to {'stable':False} after subtraction performed.
    date created : str
        Time created
    save : None
        Optional filename to save created model

    Attributes
    ----------
    numerator : Numpy.ndarray
        A locations x locations matrix comprising the sum of the log z-transformed
        correlation matrices over subjects
    denominator : Numpy.ndarray
        A locations x locations matrix comprising the log sum of the (weighted) number of
        subjects contributing to each matrix cell
    n_subs : int
        Number of subject used to create the model

    Returns
    ----------
    model : supereeg.Model instance
        A model that can be used to infer timeseries from unknown locations
    """
[docs]    def __init__(self, data=None, locs=None, template=None,
                 numerator=None, denominator=None,
                 n_subs=None, meta=None, date_created=None, rbf_width=20, save=None):
        from .load import load

        self.locs = None
        self.numerator = None
        self.denominator = None
        self.n_subs = 0
        self.meta = meta
        if self.meta is None:
            self.meta= {'stable': True}
        self.date_created = date_created
        #self.rbf_width = float(rbf_width)
        self.rbf_width = rbf_width

        if n_subs is None:
            n_subs = 1

        #expanded_to_locs = False
        if not (data is None):
            if type(data) == list:
                if len(data) == 0:
                    data = None
                else:
                    if not (locs is None):
                        if type(locs) == pd.DataFrame:
                            locs = locs.as_matrix()
                        assert type(locs) == np.ndarray, 'Locations must be either a DataFrame or a numpy array'
                        assert locs.shape[1] == 3, 'Only 3d locations are supported'
                    all_locs = locs
                    for i in range(1, len(data)):
                        if type(data) in (Model, Brain, Nifti):
                            if all_locs is None:
                                all_locs = data[i].get_locs().as_matrix()
                            else:
                                all_locs = np.vstack((all_locs, data[i].get_locs().as_matrix()))
                    locs, loc_inds = _unique(all_locs)

                    self.__init__(data=data[0], locs=locs, template=template, meta=self.meta, rbf_width=self.rbf_width,
                                  n_subs=1)

                    for i in range(1, len(data)):
                        self.update(Model(data=data[i], locs=locs, template=template, meta=self.meta,
                                          rbf_width=self.rbf_width, n_subs=1))

            if isinstance(data, six.string_types):
                data = load(data)

            if isinstance(data, Nifti):
                data = Brain(data)

            if isinstance(data, Model):
                self.date_created = data.date_created
                self.denominator = data.denominator
                self.locs = data.locs
                self.meta = data.meta
                self.n_subs = data.n_subs
                self.numerator = data.numerator
                self.rbf_width = data.rbf_width
                #self = copy.deepcopy(data)
                n_subs = self.n_subs
            elif isinstance(data, Brain):
                corrmat = _get_corrmat(data)
                self.__init__(data=corrmat, locs=data.get_locs(), n_subs=1)
            elif isinstance(data, np.ndarray):
                assert not (locs is None), 'must specify model locations'
                assert locs.shape[0] == data.shape[0], 'number of locations must match the size of the given correlation matrix'

                self.locs = locs
                self.numerator = _to_log_complex(_r2z(data))
                self.denominator = np.zeros_like(self.numerator, dtype=np.float32)

        if not ((numerator is None) or (denominator is None)):
            assert numerator.shape[0] == numerator.shape[1], 'numerator must be a square matrix'
            assert denominator.shape[0] == denominator.shape[1], 'denominator must be a square matrix'
            assert numerator.shape[0] == denominator.shape[0], 'numerator and denominator must be the same shape'
            assert not (locs is None), 'must specify model locations'
            assert locs.shape[0] == numerator.shape[0], 'number of locations must match the size of the numerator ' \
                                                        'and denominator matrices'

            if (self.numerator is None) or (self.denominator is None):
                self.numerator = numerator
                self.denominator = denominator
            else: #numerator and denominator may have already been inferred data; effectively the user has now passed in *two* sets of data
                self._set_numerator(np.logaddexp(self.numerator.real, numerator.real),
                                    np.logaddexp(self.numerator.imag, numerator.imag))
                self.denominator = np.logaddexp(self.denominator, denominator)

            self.locs = locs
            self.n_subs += n_subs

        if not (template is None): #blur correlation matrix out to template locations
            if not (locs is None):
                warnings.warn('Argument ''locs'' will be ignored in favor of the provided Nifti template')
            if isinstance(template, six.string_types):
                template = load(template)
            assert type(template) == Nifti, 'template must be a Nifti object or a path to a Nifti object'
            bo = Brain(template)
            rbf_weights = _log_rbf(bo.get_locs(), self.locs, width=self.rbf_width)
            self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights)
            self.locs = bo.get_locs()
        elif not (locs is None): #blur correlation matrix out to locs
            if (isinstance(data, Brain) or isinstance(data, Model)): #self.locs may now conflict with locs
                if not ((locs.shape[0] == self.locs.shape[0]) and np.allclose(locs, self.locs)):
                    rbf_weights = _log_rbf(locs, self.locs, width=self.rbf_width)
                    self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights)
                    self.locs = locs
        elif self.locs is None:
            self.locs = locs

        assert not (self.locs is None), 'Must specify model locations directly via locs argument, or indirectly via a Model or Brain object (or both)'

        #sort locations and force them to be unique
        self.locs, loc_inds = _unique(self.locs)
        self.numerator = self.numerator[loc_inds, :][:, loc_inds]
        self.denominator = self.denominator[loc_inds, :][:, loc_inds]
        self.n_locs = self.locs.shape[0]

        if not type(self.locs) == pd.DataFrame:
            self.locs = pd.DataFrame(data=self.locs, columns=['x', 'y', 'z'])

        if not self.date_created:
            self.date_created = time.strftime("%c")

        self.n_locs = self.locs.shape[0]
        self.n_subs = n_subs

        if not (save is None):
            if type(save) == str:
                self.save(save)
            else:
                warnings.warn('bad filename, cannot save to disk: ' + str(save))

[docs]    def get_model(self, z_transform=False):
        """ Returns a copy of the model in the form of a correlation matrix"""
        if (self.numerator is None) or (self.denominator is None):
            m = np.eye(self.n_locs)
        else:
            m = _recover_model(self.numerator, self.denominator, z_transform=z_transform)
            m[np.isnan(m)] = 0
        return m

[docs]    def get_locs(self):
        """ Returns the locations in the model
        """
        return self.locs

[docs]    def set_locs(self, new_locs, force_include_bo_locs=False):
        """
        update self.locs to a new set of locations (and blur the correlation matrix accordingly).  if
        force_include_bo_locs is True (default: False), the final set of locations will also include the old locations.
        """

        if force_include_bo_locs:
            new_locs = _union(self.locs, new_locs)
        else:
            new_locs, tmp = _unique(new_locs)

        if _empty(self.locs):
            self.locs = new_locs
            if not _empty(new_locs):
                self.numerator = np.log(self.zeros([new_locs.shape[0], new_locs.shape[0]], dtype=np.complex128))
                self.denominator = np.zeros_like(self.numerator, dtype=np.float64)
                self.locs = new_locs
                self.n_locs = new_locs.shape[0]
            return
        elif _empty(new_locs):
            if not force_include_bo_locs:
                self.locs = pd.DataFrame(columns=('x', 'y', 'z'))
                self.n_locs = 0
                self.numerator = np.array([], dtype=np.complex128)
                self.denominator = np.array([], dtype=np.float64)
            return

        new_locs_in_self = _count_overlapping(self.get_locs(), new_locs)

        if np.all(new_locs_in_self):
            inds = _count_overlapping(new_locs, self.get_locs())
            self.locs = self.locs.iloc[inds, :]
            self.n_locs = self.locs.shape[0]
            self.numerator = self.numerator[inds, :][:, inds]
            self.denominator = self.denominator[inds, :][:, inds]
            return
        else:
            rbf_weights = _log_rbf(new_locs, self.get_locs())
            self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights)
            self.locs = new_locs

        self.locs, loc_inds = _unique(self.locs)
        self.numerator = self.numerator[loc_inds, :][:, loc_inds]
        self.denominator = self.denominator[loc_inds, :][:, loc_inds]
        self.n_locs = self.locs.shape[0]



[docs]    def predict(self, bo, nearest_neighbor=False, match_threshold='auto',
                force_update=False, force_include_bo_locs=True, preprocess='zscore', recon_loc_inds=None):
        """
        Takes a brain object and a 'full' covariance model, fills in all
        electrode timeseries for all missing locations and returns the new brain
        object

        Parameters
        ----------
        bo : a Brain, Nifti, or Model object that will be converted to a Brain object.
        nearest_neighbor : True
            Default finds the nearest voxel for each subject's electrode
            location and uses that as revised electrodes location matrix in the
            prediction.
        match_threshold : 'auto' or int
            auto: if match_threshold auto, ignore all electrodes whose distance
            from the nearest matching voxel is greater than the maximum voxel
            dimension
            If value is greater than 0, inlcudes only electrodes that are within
            that distance of matched voxel
        force_update : False
            If True, will update model with patient's correlation matrix.
        force_include_bo_locs : True
            If True, and if force_update = False, update the locations in the model to include the locations in the
            given brain object prior to generating the predictions.  If force_update = True, this parameter is
            forced to be True (force_update requires updating the locations) and the specified value is ignored.
        preprocess : 'zscore' or None
            The predict algorithm requires the data to be zscored.  However, if
            your data are already zscored you can bypass this by setting to None.
        recon_at_loc : electrode ind or None
            Index for estimated location in model (location in unknown_inds)

        Returns
        ----------
        bo_p : supereeg.Brain
            New brain data object with missing electrode locations filled in
        """

        if not isinstance(bo, Brain):
            bor = Brain(bo)

        bor = bo.apply_filter(inplace=False)

        # if match_threshold auto, ignore all electrodes whose distance from the
        # nearest matching voxel is greater than the maximum voxel dimension
        if nearest_neighbor:
            bor = _near_neighbor(bor, self, match_threshold=match_threshold)

        if self.locs.shape[0] > 1000:
            warnings.warn('Model locations exceed 1000, this may take a while. Go grab a cup of coffee.')

        # if True will update the model with subject's correlation matrix
        if force_update:
            mo = self.update(bor, inplace=False)
        else:
            mo = self

        #blur out model to include brain object locations
        mo.set_locs(bor.get_locs(), force_include_bo_locs=force_include_bo_locs)

        activations = _timeseries_recon(bor, mo, preprocess=preprocess, recon_loc_inds=recon_loc_inds)

        if not recon_loc_inds:
            loc_labels = np.array(['observed'] * len(mo.get_locs()))
            loc_labels[~_count_overlapping(bor.get_locs(), mo.get_locs())] = ['reconstructed']
            recon_loc = mo.locs
        else:
            recon_loc = np.atleast_2d(mo.get_locs().iloc[~_count_overlapping(bor.get_locs(), mo.get_locs())].iloc[recon_loc_inds[0]].as_matrix())
            loc_labels = np.array(['reconstructed'] * len(list(recon_loc_inds)))

        return Brain(data=activations, locs=recon_loc, sessions=bor.sessions, sample_rate=bor.sample_rate,
                         label=loc_labels.tolist(), filter=None)


[docs]    def update(self, data, inplace=True):
        """
        Update a model with new data.

        Parameters
        ----------
        data : supereeg.Brain, supereeg.Nifti, supereeg.Model (or a mixed list of these)
            New data
        inplace : bool
            Whether to run update in place or return a new model (default True).

        Returns
        ----------
        model : supereeg.Model
            A new updated model object
        """
        if inplace:
            m1 = self
        else:
            m1 = Model(self)

        assert m1.meta['stable']==True, 'solution unstable'

        m2 = Model(data)
        locs = _union(m1.get_locs(), m2.get_locs())

        m1.set_locs(locs)
        m2.set_locs(locs)

        m1._set_numerator(np.logaddexp(m1.numerator.real, m2.numerator.real),
                          np.logaddexp(m1.numerator.imag, m2.numerator.imag))
        #simplify to ensure that each entry of the numerator has either a non-zero real part OR a non-zero imag part
        #(or neither).
        n = _to_log_complex(_to_exp_real(m1.numerator))
        m1._set_numerator(n.real, n.imag)
        m1.denominator = np.logaddexp(m1.denominator, m2.denominator)
        m1.locs = locs
        m1.n_locs = locs.shape[0]
        m1.n_subs += m2.n_subs

        #combine meta info
        if not ((m1.meta is None) and (m2.meta is None)):
            if m1.meta is None:
                m1.meta = m2.meta
            elif (type(m1.meta) == dict) and (type(m2.meta) == dict):
                m1.meta.update(m2.meta)

        if not inplace:
            return m1


    def _set_numerator(self, n_real, n_imag):
        """
        Internal function for setting the numerator (deals with size mismatches)
        """
        self.numerator = np.zeros_like(n_real, dtype=np.complex128)
        self.numerator.real = n_real
        self.numerator.imag = n_imag


[docs]    def info(self):
        """
        Print info about the model object
        Prints the number of electrodes, number of subjects, date created,
        and any optional meta data.
        """
        print('Number of locations: ' + str(self.n_locs))
        print('Number of subjects: ' + str(self.n_subs))
        print('RBF width: ' + str(self.rbf_width))
        print('Date created: ' + str(self.date_created))
        print('Meta data: ' + str(self.meta))

[docs]    def plot_data(self, savefile=None, show=True, **kwargs):
        """
        Plot the supereeg model as a correlation matrix
        This function wraps seaborn's heatmap and accepts any inputs that seaborn
        supports for models less than 2000x2000.  If the model is larger, the plot cannot be
        generated without specifying a savefile.

        Parameters
        ----------
        show : bool
            If False, image not rendered (default : True)

        Returns
        ----------
        ax : matplotlib.Axes
            An axes object
        """

        corr_mat = self.get_model(z_transform=False)

        if np.shape(corr_mat)[0] < 2000:
            ax = sns.heatmap(corr_mat, cbar_kws = {'label': 'correlation'}, **kwargs)
        else:
            if savefile == None:
                raise NotImplementedError('Cannot plot large models when savefile is None')
            else:
                ax = _plot_borderless(corr_mat, savefile=savefile, vmin=-1, vmax=1, cmap='Spectral')
        if show:
            plt.show()

        return ax

[docs]    def plot_locs(self, pdfpath=None):
        """
        Plots electrode locations from brain object

        Parameters
        ----------
        pdfpath : str
        A name for the file.  If the file extension (.pdf) is not specified, it will be appended.
        """

        locs = self.locs
        if self.locs .shape[0] <= 10000:
            _plot_locs_connectome(locs, pdfpath)
        else:
            _plot_locs_hyp(locs, pdfpath)

[docs]    def save(self, fname, compression='blosc'):
        """
        Save method for the model object
        The data will be saved as a 'mo' file, which is a dictionary containing
        the elements of a model object saved in the hd5 format using
        `deepdish`.

        Parameters
        ----------
        fname : str
            A name for the file.  If the file extension (.mo) is not specified,
            it will be appended.
        compression : str
            The kind of compression to use.  See the deepdish documentation for
            options: http://deepdish.readthedocs.io/en/latest/api_io.html#deepdish.io.save
        """

        mo = {
            'numerator' : self.numerator,
            'denominator' : self.denominator,
            'locs' : self.locs,
            'n_subs' : self.n_subs,
            'meta' : self.meta,
            'date_created' : self.date_created,
            'rbf_width' : self.rbf_width
        }

        if fname[-3:]!='.mo':
            fname+='.mo'

        dd.io.save(fname, mo, compression=compression)

[docs]    def get_slice(self, loc_inds, inplace=False):
        """
        Indexes brain object data

        Parameters
        ----------
        sample_inds : int or list
            Times you wish to index

        loc_inds : int or list
            Locations you with to index

        inplace : bool
            If True, indexes in place.

        """
        numerator = self.numerator[loc_inds][:, loc_inds]
        denominator = self.denominator[loc_inds][:, loc_inds]
        locs = self.locs.iloc[loc_inds]
        n_subs = self.n_subs
        meta = self.meta
        date_created = time.strftime("%c")

        if inplace:
            self.numerator = numerator
            self.denominator = denominator
            self.locs = locs
            self.n_subs = n_subs
            self.meta = meta
            self.date_created = date_created
        else:
            return Model(numerator=numerator, denominator=denominator, locs=locs,
                         n_subs=n_subs, meta=meta, date_created=date_created, rbf_width=self.rbf_width)

    def __add__(self, other):
        """
        Add two model objects together. The models must have matching
        locations.  Meta properties are combined across objects, or if properties
        conflict then the values from the first object are preferred.
        Parameters
        ----------
        other: Model object to be added to the current object
        """

        return self.update(other, inplace=False)

    def __sub__(self, other):
        """
        Subtract one model object from another.  Meta property, stable is updated to False.

        Parameters
        ----------
        other: Model object to be subtracted from the current object
        """

        assert type(other) == Model, 'Unsupported data type for subtraction from Model object: ' + str(type(other))

        m1 = self
        m2 = other

        assert m1.rbf_width == m2.rbf_width

        locs = _union(m1.get_locs(), m2.get_locs())

        m1.set_locs(locs)
        m2.set_locs(locs)

        m1.meta.update(m2.meta)

        meta = copy.deepcopy(m1.meta)

        assert meta['stable'] == True, 'Model is numerically unstable; cannot update model'

        meta['stable'] = False

        warnings.warn('solution unstable')


        m1_z = m1.n_subs * m1.get_model(z_transform=True)
        m2_z = m2.n_subs * m2.get_model(z_transform=True)

        m2_z[np.where(np.isnan(m2_z))] = 0
        np.fill_diagonal(m2_z, 1)

        return Model(data=_z2r(np.divide(np.subtract(m1_z,m2_z), (m1.n_subs-m2.n_subs))),
                     locs=locs, n_subs=m1.n_subs - m2.n_subs, meta=meta, rbf_width=m1.rbf_width)



###################################
# helper functions for init
###################################

def _handle_superuser(self, numerator, denominator, locs, n_subs):
    """Shortcuts model building if these args are passed"""
    self.numerator = numerator
    self.denominator = denominator

    # if locs arent already a df, turn them into df
    if isinstance(locs, pd.DataFrame):
        self.locs = locs
    else:
        self.locs = pd.DataFrame(locs, columns=['x', 'y', 'z'])

    self.n_subs = n_subs

def _create_locs(self, locs, template):
    """get locations from template, or from locs arg"""
    if locs is None:
        if template is None:
            template = _gray(20)
        nii_data, nii_locs, nii_meta = _nifti_to_brain(template)
        self.locs = pd.DataFrame(nii_locs, columns=['x', 'y', 'z'])
    else:
        self.locs = pd.DataFrame(locs, columns=['x', 'y', 'z'])
    if self.locs.shape[0]>1000:
        warnings.warn('Model locations exceed 1000, this may take a while. Go get a cup of coffee or brew some tea!')

def _bo2model(bo, locs, width=20):
    """Returns numerator and denominator given a brain object"""
    sub_corrmat = _get_corrmat(bo)
    #np.fill_diagonal(sub_corrmat, 0)
    sub_corrmat_z = _r2z(sub_corrmat)
    sub_rbf_weights = _log_rbf(locs, bo.get_locs(), width=width)
    n, d = _blur_corrmat(sub_corrmat_z, sub_rbf_weights)
    return n, d, 1

def _mo2model(mo, locs, width=20):
    """Returns numerator and denominator for model object"""

    if not isinstance(locs, pd.DataFrame):
        locs = pd.DataFrame(locs, columns=['x', 'y', 'z'])
    if locs.equals(mo.locs):
        return mo.numerator.copy(), mo.denominator.copy(), mo.n_subs
    else:
        # if the locations are not equivalent, map input model into locs space
        sub_corrmat_z = _recover_model(mo.numerator, mo.denominator, z_transform=True)
        #np.fill_diagonal(sub_corrmat_z, 0)
        sub_rbf_weights = _log_rbf(locs, mo.locs, width=width)
        n, d = _blur_corrmat(sub_corrmat_z, sub_rbf_weights)
        return n, d, mo.n_subs

def _force_update(mo, bo, width=20):
    # get subject-specific correlation matrix
    sub_corrmat = _get_corrmat(bo)

    # fill diag with zeros
    #np.fill_diagonal(sub_corrmat, 0) # <- possible failpoint

    # z-score the corrmat
    sub_corrmat_z = _r2z(sub_corrmat)

    # get _rbf weights
    sub__rbf_weights = _log_rbf(mo.locs, bo.get_locs(), width=width)

    #  get subject expanded correlation matrix
    num_corrmat_x, denom_corrmat_x = _blur_corrmat(sub_corrmat_z, sub__rbf_weights)

    # add in new subj data
    #with np.errstate(invalid='ignore'):
    n = mo.numerator.copy()
    n.real = np.logaddexp(n.real, num_corrmat_x.real)
    n.imag = np.logaddexp(n.imag, num_corrmat_x.imag)
    return _recover_model(n, np.logaddexp(mo.denominator, denom_corrmat_x), z_transform=True)

def _recover_model(num, denom, z_transform=False):
    warnings.simplefilter('ignore')

    m = np.divide(_to_exp_real(num), np.exp(denom)) #numerator and denominator are in log units
    if z_transform:
        np.fill_diagonal(m, np.inf)
        return m
    else:
        m = _z2r(m)
        np.fill_diagonal(m, 1)
        return m