Source code for supereeg.model

from __future__ import division
from __future__ import print_function
import time
import copy
import warnings
import six
import pandas as pd
import numpy as np
import seaborn as sns
import deepdish as dd
import matplotlib.pyplot as plt
from .helpers import _get_corrmat, _r2z, _z2r, _log_rbf, _blur_corrmat, _plot_borderless,\
    _near_neighbor, _timeseries_recon, _count_overlapping, _plot_locs_connectome, \
    _plot_locs_hyp, _gray, _nifti_to_brain,\
    _unique, _union, _empty, _to_log_complex, _to_exp_real
from .brain import Brain
from .nifti import Nifti


[docs]class Model(object): """ Model data object for the supereeg package This class holds your supereeg model. To create an instance, pass a list of brain objects and the model will be generated from those brain objects. You can also add your own model by passing a numpy array as your matrix and the corresponding locations. Alternatively, you can bypass creating a new model by passing numerator, denominator, locations, and n_subs (see parameters for details). Additionally, you can include a meta dictionary with any other information that you want to save with the model. Parameters ---------- data : supereeg.Brain or list supereeg.Brain, supereeg.Nifti or list supereeg.Nifti, or Numpy.ndarray A supereeg.Brain object or supereeg.Nifti object, list of objects, or a Numpy.ndarray of your model. locs : pandas.DataFrame or np.ndarray MNI coordinate (x,y,z) by number of electrode df containing electrode locations template : filepath Path to a template nifti file used to set model locations numerator : Numpy.ndarray (Optional) A locations x locations matrix comprising the sum of the log z-transformed correlation matrices over subjects. If used, must also pass denominator, locs and n_subs. Otherwise, numerator will be computed from the brain object data. denominator : Numpy.ndarray (Optional) A locations x locations matrix comprising the sum of the log (weighted) number of subjects contributing to each matrix cell. If used, must also pass numerator, locs and n_subs. Otherwise, denominator will be computed from the brain object data. n_subs : int The number of subjects used to create the model. Required if you pass numerator/denominator. Otherwise computed automatically from the data. rbf_width : positive scalar The width of the radial basis function (RBF) used as a spatial prior for smoothing estimates at nearby locations. (Default: 20) meta : dict Dict containing whatever you want: Initialized with a stability field {'stable':True}. This is changed to {'stable':False} after subtraction performed. date created : str Time created save : None Optional filename to save created model Attributes ---------- numerator : Numpy.ndarray A locations x locations matrix comprising the sum of the log z-transformed correlation matrices over subjects denominator : Numpy.ndarray A locations x locations matrix comprising the log sum of the (weighted) number of subjects contributing to each matrix cell n_subs : int Number of subject used to create the model Returns ---------- model : supereeg.Model instance A model that can be used to infer timeseries from unknown locations """
[docs] def __init__(self, data=None, locs=None, template=None, numerator=None, denominator=None, n_subs=None, meta=None, date_created=None, rbf_width=20, save=None): from .load import load self.locs = None self.numerator = None self.denominator = None self.n_subs = 0 self.meta = meta if self.meta is None: self.meta= {'stable': True} self.date_created = date_created #self.rbf_width = float(rbf_width) self.rbf_width = rbf_width if n_subs is None: n_subs = 1 #expanded_to_locs = False if not (data is None): if type(data) == list: if len(data) == 0: data = None else: if not (locs is None): if type(locs) == pd.DataFrame: locs = locs.as_matrix() assert type(locs) == np.ndarray, 'Locations must be either a DataFrame or a numpy array' assert locs.shape[1] == 3, 'Only 3d locations are supported' all_locs = locs for i in range(1, len(data)): if type(data) in (Model, Brain, Nifti): if all_locs is None: all_locs = data[i].get_locs().as_matrix() else: all_locs = np.vstack((all_locs, data[i].get_locs().as_matrix())) locs, loc_inds = _unique(all_locs) self.__init__(data=data[0], locs=locs, template=template, meta=self.meta, rbf_width=self.rbf_width, n_subs=1) for i in range(1, len(data)): self.update(Model(data=data[i], locs=locs, template=template, meta=self.meta, rbf_width=self.rbf_width, n_subs=1)) if isinstance(data, six.string_types): data = load(data) if isinstance(data, Nifti): data = Brain(data) if isinstance(data, Model): self.date_created = data.date_created self.denominator = data.denominator self.locs = data.locs self.meta = data.meta self.n_subs = data.n_subs self.numerator = data.numerator self.rbf_width = data.rbf_width #self = copy.deepcopy(data) n_subs = self.n_subs elif isinstance(data, Brain): corrmat = _get_corrmat(data) self.__init__(data=corrmat, locs=data.get_locs(), n_subs=1) elif isinstance(data, np.ndarray): assert not (locs is None), 'must specify model locations' assert locs.shape[0] == data.shape[0], 'number of locations must match the size of the given correlation matrix' self.locs = locs self.numerator = _to_log_complex(_r2z(data)) self.denominator = np.zeros_like(self.numerator, dtype=np.float32) if not ((numerator is None) or (denominator is None)): assert numerator.shape[0] == numerator.shape[1], 'numerator must be a square matrix' assert denominator.shape[0] == denominator.shape[1], 'denominator must be a square matrix' assert numerator.shape[0] == denominator.shape[0], 'numerator and denominator must be the same shape' assert not (locs is None), 'must specify model locations' assert locs.shape[0] == numerator.shape[0], 'number of locations must match the size of the numerator ' \ 'and denominator matrices' if (self.numerator is None) or (self.denominator is None): self.numerator = numerator self.denominator = denominator else: #numerator and denominator may have already been inferred data; effectively the user has now passed in *two* sets of data self._set_numerator(np.logaddexp(self.numerator.real, numerator.real), np.logaddexp(self.numerator.imag, numerator.imag)) self.denominator = np.logaddexp(self.denominator, denominator) self.locs = locs self.n_subs += n_subs if not (template is None): #blur correlation matrix out to template locations if not (locs is None): warnings.warn('Argument ''locs'' will be ignored in favor of the provided Nifti template') if isinstance(template, six.string_types): template = load(template) assert type(template) == Nifti, 'template must be a Nifti object or a path to a Nifti object' bo = Brain(template) rbf_weights = _log_rbf(bo.get_locs(), self.locs, width=self.rbf_width) self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights) self.locs = bo.get_locs() elif not (locs is None): #blur correlation matrix out to locs if (isinstance(data, Brain) or isinstance(data, Model)): #self.locs may now conflict with locs if not ((locs.shape[0] == self.locs.shape[0]) and np.allclose(locs, self.locs)): rbf_weights = _log_rbf(locs, self.locs, width=self.rbf_width) self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights) self.locs = locs elif self.locs is None: self.locs = locs assert not (self.locs is None), 'Must specify model locations directly via locs argument, or indirectly via a Model or Brain object (or both)' #sort locations and force them to be unique self.locs, loc_inds = _unique(self.locs) self.numerator = self.numerator[loc_inds, :][:, loc_inds] self.denominator = self.denominator[loc_inds, :][:, loc_inds] self.n_locs = self.locs.shape[0] if not type(self.locs) == pd.DataFrame: self.locs = pd.DataFrame(data=self.locs, columns=['x', 'y', 'z']) if not self.date_created: self.date_created = time.strftime("%c") self.n_locs = self.locs.shape[0] self.n_subs = n_subs if not (save is None): if type(save) == str: self.save(save) else: warnings.warn('bad filename, cannot save to disk: ' + str(save))
[docs] def get_model(self, z_transform=False): """ Returns a copy of the model in the form of a correlation matrix""" if (self.numerator is None) or (self.denominator is None): m = np.eye(self.n_locs) else: m = _recover_model(self.numerator, self.denominator, z_transform=z_transform) m[np.isnan(m)] = 0 return m
[docs] def get_locs(self): """ Returns the locations in the model """ return self.locs
[docs] def set_locs(self, new_locs, force_include_bo_locs=False): """ update self.locs to a new set of locations (and blur the correlation matrix accordingly). if force_include_bo_locs is True (default: False), the final set of locations will also include the old locations. """ if force_include_bo_locs: new_locs = _union(self.locs, new_locs) else: new_locs, tmp = _unique(new_locs) if _empty(self.locs): self.locs = new_locs if not _empty(new_locs): self.numerator = np.log(self.zeros([new_locs.shape[0], new_locs.shape[0]], dtype=np.complex128)) self.denominator = np.zeros_like(self.numerator, dtype=np.float64) self.locs = new_locs self.n_locs = new_locs.shape[0] return elif _empty(new_locs): if not force_include_bo_locs: self.locs = pd.DataFrame(columns=('x', 'y', 'z')) self.n_locs = 0 self.numerator = np.array([], dtype=np.complex128) self.denominator = np.array([], dtype=np.float64) return new_locs_in_self = _count_overlapping(self.get_locs(), new_locs) if np.all(new_locs_in_self): inds = _count_overlapping(new_locs, self.get_locs()) self.locs = self.locs.iloc[inds, :] self.n_locs = self.locs.shape[0] self.numerator = self.numerator[inds, :][:, inds] self.denominator = self.denominator[inds, :][:, inds] return else: rbf_weights = _log_rbf(new_locs, self.get_locs()) self.numerator, self.denominator = _blur_corrmat(self.get_model(z_transform=True), rbf_weights) self.locs = new_locs self.locs, loc_inds = _unique(self.locs) self.numerator = self.numerator[loc_inds, :][:, loc_inds] self.denominator = self.denominator[loc_inds, :][:, loc_inds] self.n_locs = self.locs.shape[0]
[docs] def predict(self, bo, nearest_neighbor=False, match_threshold='auto', force_update=False, force_include_bo_locs=True, preprocess='zscore', recon_loc_inds=None): """ Takes a brain object and a 'full' covariance model, fills in all electrode timeseries for all missing locations and returns the new brain object Parameters ---------- bo : a Brain, Nifti, or Model object that will be converted to a Brain object. nearest_neighbor : True Default finds the nearest voxel for each subject's electrode location and uses that as revised electrodes location matrix in the prediction. match_threshold : 'auto' or int auto: if match_threshold auto, ignore all electrodes whose distance from the nearest matching voxel is greater than the maximum voxel dimension If value is greater than 0, inlcudes only electrodes that are within that distance of matched voxel force_update : False If True, will update model with patient's correlation matrix. force_include_bo_locs : True If True, and if force_update = False, update the locations in the model to include the locations in the given brain object prior to generating the predictions. If force_update = True, this parameter is forced to be True (force_update requires updating the locations) and the specified value is ignored. preprocess : 'zscore' or None The predict algorithm requires the data to be zscored. However, if your data are already zscored you can bypass this by setting to None. recon_at_loc : electrode ind or None Index for estimated location in model (location in unknown_inds) Returns ---------- bo_p : supereeg.Brain New brain data object with missing electrode locations filled in """ if not isinstance(bo, Brain): bor = Brain(bo) bor = bo.apply_filter(inplace=False) # if match_threshold auto, ignore all electrodes whose distance from the # nearest matching voxel is greater than the maximum voxel dimension if nearest_neighbor: bor = _near_neighbor(bor, self, match_threshold=match_threshold) if self.locs.shape[0] > 1000: warnings.warn('Model locations exceed 1000, this may take a while. Go grab a cup of coffee.') # if True will update the model with subject's correlation matrix if force_update: mo = self.update(bor, inplace=False) else: mo = self #blur out model to include brain object locations mo.set_locs(bor.get_locs(), force_include_bo_locs=force_include_bo_locs) activations = _timeseries_recon(bor, mo, preprocess=preprocess, recon_loc_inds=recon_loc_inds) if not recon_loc_inds: loc_labels = np.array(['observed'] * len(mo.get_locs())) loc_labels[~_count_overlapping(bor.get_locs(), mo.get_locs())] = ['reconstructed'] recon_loc = mo.locs else: recon_loc = np.atleast_2d(mo.get_locs().iloc[~_count_overlapping(bor.get_locs(), mo.get_locs())].iloc[recon_loc_inds[0]].as_matrix()) loc_labels = np.array(['reconstructed'] * len(list(recon_loc_inds))) return Brain(data=activations, locs=recon_loc, sessions=bor.sessions, sample_rate=bor.sample_rate, label=loc_labels.tolist(), filter=None)
[docs] def update(self, data, inplace=True): """ Update a model with new data. Parameters ---------- data : supereeg.Brain, supereeg.Nifti, supereeg.Model (or a mixed list of these) New data inplace : bool Whether to run update in place or return a new model (default True). Returns ---------- model : supereeg.Model A new updated model object """ if inplace: m1 = self else: m1 = Model(self) assert m1.meta['stable']==True, 'solution unstable' m2 = Model(data) locs = _union(m1.get_locs(), m2.get_locs()) m1.set_locs(locs) m2.set_locs(locs) m1._set_numerator(np.logaddexp(m1.numerator.real, m2.numerator.real), np.logaddexp(m1.numerator.imag, m2.numerator.imag)) #simplify to ensure that each entry of the numerator has either a non-zero real part OR a non-zero imag part #(or neither). n = _to_log_complex(_to_exp_real(m1.numerator)) m1._set_numerator(n.real, n.imag) m1.denominator = np.logaddexp(m1.denominator, m2.denominator) m1.locs = locs m1.n_locs = locs.shape[0] m1.n_subs += m2.n_subs #combine meta info if not ((m1.meta is None) and (m2.meta is None)): if m1.meta is None: m1.meta = m2.meta elif (type(m1.meta) == dict) and (type(m2.meta) == dict): m1.meta.update(m2.meta) if not inplace: return m1
def _set_numerator(self, n_real, n_imag): """ Internal function for setting the numerator (deals with size mismatches) """ self.numerator = np.zeros_like(n_real, dtype=np.complex128) self.numerator.real = n_real self.numerator.imag = n_imag
[docs] def info(self): """ Print info about the model object Prints the number of electrodes, number of subjects, date created, and any optional meta data. """ print('Number of locations: ' + str(self.n_locs)) print('Number of subjects: ' + str(self.n_subs)) print('RBF width: ' + str(self.rbf_width)) print('Date created: ' + str(self.date_created)) print('Meta data: ' + str(self.meta))
[docs] def plot_data(self, savefile=None, show=True, **kwargs): """ Plot the supereeg model as a correlation matrix This function wraps seaborn's heatmap and accepts any inputs that seaborn supports for models less than 2000x2000. If the model is larger, the plot cannot be generated without specifying a savefile. Parameters ---------- show : bool If False, image not rendered (default : True) Returns ---------- ax : matplotlib.Axes An axes object """ corr_mat = self.get_model(z_transform=False) if np.shape(corr_mat)[0] < 2000: ax = sns.heatmap(corr_mat, cbar_kws = {'label': 'correlation'}, **kwargs) else: if savefile == None: raise NotImplementedError('Cannot plot large models when savefile is None') else: ax = _plot_borderless(corr_mat, savefile=savefile, vmin=-1, vmax=1, cmap='Spectral') if show: plt.show() return ax
[docs] def plot_locs(self, pdfpath=None): """ Plots electrode locations from brain object Parameters ---------- pdfpath : str A name for the file. If the file extension (.pdf) is not specified, it will be appended. """ locs = self.locs if self.locs .shape[0] <= 10000: _plot_locs_connectome(locs, pdfpath) else: _plot_locs_hyp(locs, pdfpath)
[docs] def save(self, fname, compression='blosc'): """ Save method for the model object The data will be saved as a 'mo' file, which is a dictionary containing the elements of a model object saved in the hd5 format using `deepdish`. Parameters ---------- fname : str A name for the file. If the file extension (.mo) is not specified, it will be appended. compression : str The kind of compression to use. See the deepdish documentation for options: http://deepdish.readthedocs.io/en/latest/api_io.html#deepdish.io.save """ mo = { 'numerator' : self.numerator, 'denominator' : self.denominator, 'locs' : self.locs, 'n_subs' : self.n_subs, 'meta' : self.meta, 'date_created' : self.date_created, 'rbf_width' : self.rbf_width } if fname[-3:]!='.mo': fname+='.mo' dd.io.save(fname, mo, compression=compression)
[docs] def get_slice(self, loc_inds, inplace=False): """ Indexes brain object data Parameters ---------- sample_inds : int or list Times you wish to index loc_inds : int or list Locations you with to index inplace : bool If True, indexes in place. """ numerator = self.numerator[loc_inds][:, loc_inds] denominator = self.denominator[loc_inds][:, loc_inds] locs = self.locs.iloc[loc_inds] n_subs = self.n_subs meta = self.meta date_created = time.strftime("%c") if inplace: self.numerator = numerator self.denominator = denominator self.locs = locs self.n_subs = n_subs self.meta = meta self.date_created = date_created else: return Model(numerator=numerator, denominator=denominator, locs=locs, n_subs=n_subs, meta=meta, date_created=date_created, rbf_width=self.rbf_width)
def __add__(self, other): """ Add two model objects together. The models must have matching locations. Meta properties are combined across objects, or if properties conflict then the values from the first object are preferred. Parameters ---------- other: Model object to be added to the current object """ return self.update(other, inplace=False) def __sub__(self, other): """ Subtract one model object from another. Meta property, stable is updated to False. Parameters ---------- other: Model object to be subtracted from the current object """ assert type(other) == Model, 'Unsupported data type for subtraction from Model object: ' + str(type(other)) m1 = self m2 = other assert m1.rbf_width == m2.rbf_width locs = _union(m1.get_locs(), m2.get_locs()) m1.set_locs(locs) m2.set_locs(locs) m1.meta.update(m2.meta) meta = copy.deepcopy(m1.meta) assert meta['stable'] == True, 'Model is numerically unstable; cannot update model' meta['stable'] = False warnings.warn('solution unstable') m1_z = m1.n_subs * m1.get_model(z_transform=True) m2_z = m2.n_subs * m2.get_model(z_transform=True) m2_z[np.where(np.isnan(m2_z))] = 0 np.fill_diagonal(m2_z, 1) return Model(data=_z2r(np.divide(np.subtract(m1_z,m2_z), (m1.n_subs-m2.n_subs))), locs=locs, n_subs=m1.n_subs - m2.n_subs, meta=meta, rbf_width=m1.rbf_width)
################################### # helper functions for init ################################### def _handle_superuser(self, numerator, denominator, locs, n_subs): """Shortcuts model building if these args are passed""" self.numerator = numerator self.denominator = denominator # if locs arent already a df, turn them into df if isinstance(locs, pd.DataFrame): self.locs = locs else: self.locs = pd.DataFrame(locs, columns=['x', 'y', 'z']) self.n_subs = n_subs def _create_locs(self, locs, template): """get locations from template, or from locs arg""" if locs is None: if template is None: template = _gray(20) nii_data, nii_locs, nii_meta = _nifti_to_brain(template) self.locs = pd.DataFrame(nii_locs, columns=['x', 'y', 'z']) else: self.locs = pd.DataFrame(locs, columns=['x', 'y', 'z']) if self.locs.shape[0]>1000: warnings.warn('Model locations exceed 1000, this may take a while. Go get a cup of coffee or brew some tea!') def _bo2model(bo, locs, width=20): """Returns numerator and denominator given a brain object""" sub_corrmat = _get_corrmat(bo) #np.fill_diagonal(sub_corrmat, 0) sub_corrmat_z = _r2z(sub_corrmat) sub_rbf_weights = _log_rbf(locs, bo.get_locs(), width=width) n, d = _blur_corrmat(sub_corrmat_z, sub_rbf_weights) return n, d, 1 def _mo2model(mo, locs, width=20): """Returns numerator and denominator for model object""" if not isinstance(locs, pd.DataFrame): locs = pd.DataFrame(locs, columns=['x', 'y', 'z']) if locs.equals(mo.locs): return mo.numerator.copy(), mo.denominator.copy(), mo.n_subs else: # if the locations are not equivalent, map input model into locs space sub_corrmat_z = _recover_model(mo.numerator, mo.denominator, z_transform=True) #np.fill_diagonal(sub_corrmat_z, 0) sub_rbf_weights = _log_rbf(locs, mo.locs, width=width) n, d = _blur_corrmat(sub_corrmat_z, sub_rbf_weights) return n, d, mo.n_subs def _force_update(mo, bo, width=20): # get subject-specific correlation matrix sub_corrmat = _get_corrmat(bo) # fill diag with zeros #np.fill_diagonal(sub_corrmat, 0) # <- possible failpoint # z-score the corrmat sub_corrmat_z = _r2z(sub_corrmat) # get _rbf weights sub__rbf_weights = _log_rbf(mo.locs, bo.get_locs(), width=width) # get subject expanded correlation matrix num_corrmat_x, denom_corrmat_x = _blur_corrmat(sub_corrmat_z, sub__rbf_weights) # add in new subj data #with np.errstate(invalid='ignore'): n = mo.numerator.copy() n.real = np.logaddexp(n.real, num_corrmat_x.real) n.imag = np.logaddexp(n.imag, num_corrmat_x.imag) return _recover_model(n, np.logaddexp(mo.denominator, denom_corrmat_x), z_transform=True) def _recover_model(num, denom, z_transform=False): warnings.simplefilter('ignore') m = np.divide(_to_exp_real(num), np.exp(denom)) #numerator and denominator are in log units if z_transform: np.fill_diagonal(m, np.inf) return m else: m = _z2r(m) np.fill_diagonal(m, 1) return m