Source code for supereeg.helpers

from __future__ import division
from __future__ import print_function

#TODO: there are multiple implementations of functions like _apply_by_file_index.  these should be consolidated into one
#common function that is used and called multiple times.  In addition, aggregator and transform functions that are used
#across apply_by_file wrappers should be shared (rather than defined multiple times).  We could also call_apply_by_file_index
#"groupby" to conform to the pandas style.  e.g. bo.groupby(session) returns a generator whose produces are brain objects
#each of one session.  we could then use bo.groupby(session).aggregate(xform) to produce a list of objects, where each is
#comprised of the xform applied to the brain object containing one session worth of data from the original object.

import copy
import os
import numpy.matlib as mat
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import imageio
import nibabel as nib
import hypertools as hyp
import shutil
import warnings


from nilearn import plotting as ni_plt
from nilearn import image
from nilearn.input_data import NiftiMasker
from scipy.stats import kurtosis, zscore, pearsonr
from scipy.spatial.distance import pdist
from scipy.spatial.distance import cdist
from scipy.spatial.distance import squareform
from scipy.special import logsumexp
from scipy import linalg
from scipy.ndimage.interpolation import zoom
try:
    from itertools import zip_longest
except:
    from itertools import izip_longest as zip_longest


def _std(res=None):
    """
    Load a Nifti image of the standard MNI 152 brain at the given resolution


    Parameters
    ----------
    res : int or float or None
        If int or float: (for cubic voxels) or a list or array of 3D voxel dimensions
        If None, returns loaded gray matter masked brain

    Returns
    ----------
    results : Nifti1Image
         Nifti image of the standard brain

    """
    from .nifti import Nifti
    from .load import load

    std_img = load('std')
    if res:
        return _resample_nii(std_img, res)
    else:
        return std_img


def _gray(res=None):
    """
    Load a Nifti image of the gray matter masked MNI 152 brain at the given resolution


    Parameters
    ----------
    res : int or float or None
        If int or float: (for cubic voxels) or a list or array of 3D voxel dimensions
        If None, returns loaded gray matter masked brain

    Returns
    ----------
    results : Nifti1Image
         Nifti image of gray masked brain

    """
    from .nifti import Nifti
    from .load import load

    gray_img = load('gray')
    threshold = 100
    gray_data = gray_img.get_data()
    gray_data[np.isnan(gray_data) | (gray_data < threshold)] = 0

    if np.iterable(res) or np.isscalar(res):
        return _resample_nii(Nifti(gray_data, gray_img.affine), res)
    else:
        return Nifti(gray_data, gray_img.affine)


def _resample_nii(x, target_res, precision=5):
    """
    Resample a Nifti image to have the given voxel dimensions


    Parameters
    ----------
    x : Nifti1Image
        Input Nifti image (a nibel Nifti1Image object)

    target_res : int or float or None
        Int or float (for cubic voxels) or a list or array of 3D voxel dimensions

    precision : int
        Number of decimal places in affine transformation matrix for resulting image (default: 5)

    Returns
    ----------
    results : Nifti1Image
         Re-scaled Nifti image

    """

    from .nifti import Nifti

    if np.any(np.isnan(x.get_data())):
        img = x.get_data()
        img[np.isnan(img)] = 0.0
        x = nib.nifti1.Nifti1Image(img, x.affine)

    res = x.header.get_zooms()[0:3]
    scale = np.divide(res, target_res).ravel()

    target_affine = x.affine

    target_affine[0:3, 0:3] /= scale
    target_affine = np.round(target_affine, decimals=precision)

    # correct for 1-voxel shift
    target_affine[0:3, 3] -= np.squeeze(np.multiply(np.divide(target_res, 2.0), np.sign(target_affine[0:3, 3])))
    target_affine[0:3, 3] += np.squeeze(np.sign(target_affine[0:3, 3]))

    if len(scale) < np.ndim(x.get_data()):
        assert np.ndim(x.get_data()) == 4, 'Data must be 3D or 4D'
        scale = np.append(scale, x.shape[3])

    z = zoom(x.get_data(), scale)
    try:
        z[z < 1e-5] = np.nan
    except:
        pass
    return Nifti(z, target_affine)


def _apply_by_file_index(bo, xform, aggregator):
    """
    Session dependent function application and aggregation

    Parameters
    ----------
    bo : Brain object
        Contains data

    xform : function
        The function to apply to the data matrix from each filename

    aggregator: function
        Function for aggregating results across multiple iterations

    Returns
    ----------
    results : numpy ndarray
         Array of aggregated results

    """

    for idx, session in enumerate(bo.sessions.unique()):
        session_xform = xform(bo.get_slice(sample_inds=np.where(bo.sessions == session)[0], inplace=False))
        if idx is 0:
            results = session_xform
        else:
            results = aggregator(results, session_xform)

    return results


def _kurt_vals(bo):
    """
    Function that calculates maximum kurtosis values for each channel

    Parameters
    ----------
    bo : Brain object
        Contains data

    Returns
    ----------
    results: 1D ndarray
        Maximum kurtosis across sessions for each channel

    """
    sessions = bo.sessions.unique()
    results = list(map(lambda s: kurtosis(bo.data[(s==bo.sessions).values]), sessions))
    return np.max(np.vstack(results), axis=0)


def _get_corrmat(bo):
    """
    Function that calculates the average subject level correlation matrix for brain object across session

    Parameters
    ----------
    bo : Brain object
        Contains data


    Returns
    ----------
    results: 2D np.ndarray
        The average correlation matrix across sessions

    """

    def aggregate(p, n):
        return p + n

    def zcorr_xform(bo):
        return np.multiply(bo.dur, _r2z(1 - squareform(pdist(bo.get_data().T, 'correlation'))))

    summed_zcorrs = _apply_by_file_index(bo, zcorr_xform, aggregate)

    #weight each session by recording time
    return _z2r(summed_zcorrs / np.sum(bo.dur))


def _z_score(bo):
    """
    Function that calculates the average subject level correlation matrix for brain object across session

    Parameters
    ----------
    bo : Brain object
        Contains data

    Returns
    ----------
    results: 2D np.ndarray
        The average correlation matrix across sessions

    """
    def z_score_xform(bo):
        return zscore(bo.get_data())

    def vstack_aggregrate(x1, x2):
        return np.vstack((x1, x2))

    return _apply_by_file_index(bo, z_score_xform, vstack_aggregrate)



def _z2r(z):
    """
    Function that calculates the inverse Fisher z-transformation

    Parameters
    ----------
    z : int or ndarray
        Fishers z transformed correlation value

    Returns
    ----------
    result : int or ndarray
        Correlation value

    """
    warnings.simplefilter('ignore')
    if isinstance(z, list):
        z = np.array(z)
    r = (np.exp(2 * z) - 1) / (np.exp(2 * z) + 1)
    if isinstance(r, np.ndarray):
        r[np.isinf(z) & (z > 0)] = 1
        r[np.isinf(z) & (z < 0)] = -1
    else:
        if np.isinf(z) & (z > 0):
            return 1
        elif np.isinf(z) & (z < 0):
            return -1
    return r

def _r2z(r):
    """
    Function that calculates the Fisher z-transformation

    Parameters
    ----------
    r : int or ndarray
        Correlation value

    Returns
    ----------
    result : int or ndarray
        Fishers z transformed correlation value

    """
    warnings.simplefilter('ignore')
    return 0.5 * (np.log(1 + r) - np.log(1 - r))


def _log_rbf(to_coords, from_coords, width=20):
    """
    Radial basis function

    Parameters
    ----------
    to_coords : ndarray
        Series of all coordinates (one per row) - R_full

    c : ndarray
        Series of subject's coordinates (one per row) - R_subj

    width : positive scalar
        Radius

    Returns
    ----------
    results : ndarray
        Matrix of log rbf weights for each subject coordinate for all coordinates

    """
    assert np.isscalar(width), 'RBF width must be a scalar'
    assert width > 0, 'RBF width must be positive'
    weights = -cdist(to_coords, from_coords, metric='euclidean') ** 2 / float(width)
    return weights


[docs]def tal2mni(r):
    """
    Convert coordinates (electrode locations) from Talairach to MNI space

    Parameters
    ----------
    r : ndarray
        Coordinate locations (Talairach space)


    Returns
    ----------
    results : ndarray
        Coordinate locations (MNI space)

    """

    rotmat = np.array([[1, 0, 0, 0], [0, 0.9988, 0.0500, 0], [0, -0.0500, 0.9988, 0], [0, 0, 0, 1.0000]])
    up = np.array([[0.9900, 0, 0, 0], [0, 0.9700, 0, 0], [0, 0, 0.9200, 0], [0, 0, 0, 1.0000]])
    down = np.array([[0.9900, 0, 0, 0], [0, 0.9700, 0, 0], [0, 0, 0.8400, 0], [0, 0, 0, 1.0000]])

    inpoints = np.c_[r, np.ones(r.shape[0], dtype=np.float)].T
    tmp = inpoints[2, :] < 0
    inpoints[:, tmp] = linalg.solve(np.dot(rotmat, down), inpoints[:, tmp])
    inpoints[:, ~tmp] = linalg.solve(np.dot(rotmat, up), inpoints[:, ~tmp])

    return np.round(inpoints[0:3, :].T, decimals=2)


def _blur_corrmat(Z, weights):
    """
    Gets full correlation matrix

    Parameters
    ----------
    Z : Numpy array
        Subject's Fisher z-transformed correlation matrix

    weights : Numpy array
        Weights matrix calculated using _log_rbf function matrix

    mode : str
        Specifies whether to compute over all elecs (fit mode) or just new elecs
        (predict mode)

    Returns
    ----------
    numerator : Numpy array
        Numerator for the expanded correlation matrix
    denominator : Numpy array
        Denominator for the expanded correlation matrix
    """
    #import seaborn as sns
    #import matplotlib.pyplot as plt

    triu_inds = np.triu_indices(Z.shape[0], k=1)

    #need to do computations seperately for positive and negative values
    sign_Z_full = np.sign(Z)
    #logZ_pos_full = np.log(np.multiply(sign_Z_full > 0, Z))
    #logZ_neg_full = np.log(np.multiply(sign_Z_full < 0, np.abs(Z)))

    sign_Z = sign_Z_full[triu_inds]
    logZ_pos = np.log(np.multiply(sign_Z > 0, Z[triu_inds]))
    logZ_neg = np.log(np.multiply(sign_Z < 0, np.abs(Z[triu_inds])))

    n = weights.shape[0]
    K_pos = np.zeros([n, n])
    K_neg = np.zeros([n, n])
    W = np.zeros([n, n])

    for x in range(n-1):
        xweights = weights[x, :]
        x_match = np.isclose(xweights, 0)
        for y in range(x+1, n): #fill in upper triangle only
            yweights = weights[y, :]
            y_match = np.isclose(yweights, 0)

            if np.any(x_match) and np.any(y_match): #the pair of locations we're filling in already exists in the given data
                x_ind = np.where(x_match)[0]
                y_ind = np.where(y_match)[0]
                Z_match_val = np.mean(Z[x_ind, y_ind])
                W[x, y] = 0.
                if Z_match_val > 0:
                    K_pos[x, y] = np.log(Z_match_val)
                    K_neg[x, y] = -np.inf
                else:
                    K_pos[x, y] = -np.inf
                    K_neg[x, y] = np.log(np.abs(Z_match_val))
                continue

            next_weights = np.add.outer(xweights, yweights)
            next_weights = next_weights[triu_inds]

            W[x, y] = logsumexp(next_weights)
            K_pos[x, y] = logsumexp(logZ_pos + next_weights)
            K_neg[x, y] = logsumexp(logZ_neg + next_weights)

    #turn K_neg into complex numbers.  Where K_neg is infinite, this results in nans for the real number parts, so we'll
    #set any nans in K_neg.real to 0
    #TODO: the next lines are redundant with code in _to_log_complex; consolidate
    K_neg = np.multiply(0+1j, K_neg)
    K_neg.real[np.isnan(K_neg)] = 0
    K = K_pos + K_neg

    return K + K.T, W + W.T

def _to_log_complex(X):
    """
    Compute the log of the given numpy array.  Store all positive members of the original array in the real component of
    the result and all negative members of the original array in the complex component of the result.

    Parameters
    ----------
    X : numpy array to take the log of

    Returns
    ----------
    log_X_complex : The log of X, stored as complex numbers to keep track of the positive and negative parts
    """
    warnings.simplefilter('ignore')

    signX = np.sign(X)

    posX = np.log(np.multiply(signX > 0, X))
    posX[np.isnan(posX)] = 0

    negX = np.log(np.abs(np.multiply(signX < 0, X)))
    negX = np.multiply(0 + 1j, negX)
    negX.real[np.isnan(negX.real)] = 0

    return posX + negX

def _to_exp_real(C):
    """
    Inverse of _to_log_complex
    """
    posX = np.exp(C.real)
    if np.any(np.iscomplex(C)):
        negX = np.exp(C.imag)
        return posX - negX
    else:
        return posX


def _logsubexp(x,y):
    """
    Subtracts logged arrays
    Parameters
    ----------
    x : Numpy array
        Log complex array
    y : Numpy array
        Log complex array
    Returns
    ----------
    z : Numpy array
        Returns log complex array of x-y
    """
    if np.any(np.iscomplex(y)):
        y = _to_exp_real(y)
    else:
        y = np.exp(y)
    sub_log = _to_log_complex(x)
    neg_y_log = _to_log_complex(-y)
    sub_log.real = np.logaddexp(x.real, neg_y_log.real)
    sub_log.imag = np.logaddexp(x.imag, neg_y_log.imag)
    return sub_log


def _fill_upper_triangle(M, value):
    upper_tri = np.copy(M)
    upper_tri[np.triu_indices(upper_tri.shape[0], 1)] = value
    np.fill_diagonal(upper_tri, value)
    return upper_tri


def _timeseries_recon(bo, mo, chunk_size=1000, preprocess='zscore', recon_loc_inds=None):
    """
    Reconstruction done by chunking by session
        Parameters
    ----------
    bo : Brain object
        Data to be reconstructed

    mo : Model object
        Model to base the reconstructions on

    chunk_size : int
        Size to break data into

    recon_at_loc: list
        Indexes for estimated location in average matrix (location in unknown_inds)

    Returns
    ----------
    results : ndarray
        Compiled reconstructed timeseries
    """
    if preprocess==None:
        data = bo.get_data().as_matrix()
    elif preprocess=='zscore':
        if bo.data.shape[0]<3:
            warnings.warn('Not enough samples to zscore so it will be skipped.'
            ' Note that this will cause problems if your data are not already '
            'zscored.')
            data = bo.get_data().as_matrix()
        else:
            data = bo.get_zscore_data()
    else:
        raise('Unsupported preprocessing option: ' + preprocess)

    brain_locs_in_model = _count_overlapping(mo.get_locs(), bo.get_locs())
    model_locs_in_brain = _count_overlapping(bo.get_locs(), mo.get_locs())

    if np.all(model_locs_in_brain):
        #if the model contains all of the locations (or fewer) than what are in the brain object, no reconstructions
        #are needed
        return data[:, brain_locs_in_model]

    #otherwise, we'll need to do some work
    Z = mo.get_model(z_transform=True)
    if ~np.any(brain_locs_in_model):
        #if none of the brain locations are in the model, we need to blur out the model to match up with the
        # locations in the brain object
        ### isnt this bypassed in the set_locs??
        combined_locs = np.vstack((bo.get_locs(), mo.get_locs()))
        model_locs_in_brain = [False]*bo.get_locs().shape[0]
        model_locs_in_brain.extend([True]*mo.get_locs().shape[0])

        rbf_weights = _log_rbf(combined_locs, mo.get_locs())
        Z = _blur_corrmat(Z, rbf_weights)

    K = _z2r(Z)

    known_inds, unknown_inds = known_unknown(mo.get_locs().as_matrix(), bo.get_locs().as_matrix(),
                                                  bo.get_locs().as_matrix())
    Kaa = K[known_inds, :][:, known_inds]
    Kaa_inv = np.linalg.pinv(Kaa)

    Kba = K[unknown_inds, :][:, known_inds]

    sessions = bo.sessions.unique()
    filter_chunks = []
    chunks = [np.array(i) for session in sessions for i in _chunker(bo.sessions[bo.sessions == session].index.tolist(), chunk_size)]
    for i in chunks:
        filter_chunks.append([x for x in i if x is not None])
    chunks=None


    if recon_loc_inds:
        combined_data = np.zeros((data.shape[0], len(recon_loc_inds)), dtype=data.dtype)

        for x in filter_chunks:

            combined_data[x,range(len(recon_loc_inds))] = _reconstruct_activity(data[x, :], Kba, Kaa_inv, recon_loc_inds=recon_loc_inds)

    else:
        combined_data = np.zeros((data.shape[0], K.shape[0]), dtype=data.dtype)
        combined_data[:, unknown_inds] = np.vstack(list(map(lambda x: _reconstruct_activity(data[x, :], Kba, Kaa_inv),
                                                            filter_chunks)))
        combined_data[:, known_inds] = data

    for s in sessions:
        combined_data[bo.sessions==s, :] = zscore(combined_data[bo.sessions==s, :])

    return combined_data

def _chunker(iterable, chunksize, fillvalue=None):
    """
    Chunks longer sequence by regular interval

    Parameters
    ----------
    iterable : list or ndarray
        Use would be a long timeseries that needs to be broken down

    chunksize : int
        Size to break down

    Returns
    ----------
    results : ndarray
        Chunked timeseries

    """
    try:
        from itertools import zip_longest as zip_longest
    except:
        from itertools import izip_longest as zip_longest

    args = [iter(iterable)] * chunksize
    return list(zip_longest(*args, fillvalue=fillvalue))


def _reconstruct_activity(Y, Kba, Kaa_inv, recon_loc_inds=None):
    """
    Reconstruct activity

    Parameters
    ----------
    Y : numpy array
        brain object with zscored data

    Kba : correlation matrix (unknown to known)

    Kaa_inv : inverse correlation matrix (known to known)

    zscore = False

    Returns
    ----------
    results : ndarray
        Reconstructed timeseries

    """
    if recon_loc_inds:
        return np.squeeze(np.dot(np.dot(Kba, Kaa_inv), Y.T).T)[:, recon_loc_inds[0]]
    else:
        return np.dot(np.dot(Kba, Kaa_inv), Y.T).T

def filter_elecs(bo, measure='kurtosis', threshold=10):
    """
    Filter electrodes based on kurtosis value

    Parameters
    ----------
    bo : brain object
        Brain object

    measure : 'kurtosis'
        Method to filter electrodes. Only kurtosis supported currently.

    threshold : int
        Threshold for filtering

    Returns
    ----------
    result : brain object
        Brain object with electrodes and corresponding data that passes kurtosis thresholding

    """
    thresh_bool = bo.kurtosis > threshold
    nbo = copy.deepcopy(bo) #TODO: modify bo.get_locs rather than copying brain object again here
    nbo.data = bo.data.loc[:, ~thresh_bool]
    nbo.locs = bo.locs.loc[~thresh_bool]
    nbo.n_elecs = bo.data.shape[1]
    return nbo


def filter_subj(bo, measure='kurtosis', return_locs=False, threshold=10):
    """
    Filter subjects if less than two electrodes pass kurtosis value

    Parameters
    ----------
    bo : str
        Path to brain object

    measure : 'kurtosis'
        Method to filter electrodes. Only kurtosis supported currently.

    return_locs : bool
        Default False, returns meta data. If True, returns electrode locations that pass kurtosis threshold

    threshold : int
        Threshold for filtering.

    Returns
    ----------
    result : meta brain object or None
        Meta field from brain object if two or more electrodes pass kurtosis thresholding.

    """
    from .load import load

    locs = load(bo, field='locs')
    kurt_vals = load(bo, field='kurtosis')
    meta = load(bo, field='meta')

    if not meta is None:
        thresh_bool = kurt_vals > threshold
        if sum(~thresh_bool) < 2:
            print(meta + ': not enough electrodes pass threshold')

        else:
            if return_locs:
                locs = pd.DataFrame(locs, columns=['x', 'y', 'z'])
                return meta, locs[~thresh_bool]
            else:
                return meta
    else:
        print('no meta data for brain object')


def _corr_column(X, Y):
    return np.array([pearsonr(x, y)[0] for x, y in zip(X.T, Y.T)])



def _normalize_Y(Y_matrix): #TODO: should be part of bo.get_data and/or Brain.__init__
    """
    Normalizes timeseries

    Parameters
    ----------
    Y_matrix : ndarray
        Raw activity from each electrode channel

    Returns
    ----------
    results : ndarray
        Normalized activity from each electrode channel

    """
    Y = Y_matrix
    m = mat.repmat(np.min(Y, axis=0), Y.shape[0], 1)
    Y = Y - m
    m = mat.repmat(np.max(Y, axis=0), Y.shape[0], 1)
    Y = np.divide(Y, m)
    added = mat.repmat(0.5 + np.arange(Y.shape[1]), Y.shape[0], 1)
    Y = Y + added
    return pd.DataFrame(Y)


def _fullfact(dims):
    '''
    Replicates MATLAB's _fullfact function (behaves the same way)
    '''
    vals = np.asmatrix(list(range(1, dims[0] + 1))).T
    if len(dims) == 1:
        return vals
    else:
        aftervals = np.asmatrix(_fullfact(dims[1:]))
        inds = np.asmatrix(np.zeros((np.prod(dims), len(dims))))
        row = 0
        for i in range(aftervals.shape[0]):
            inds[row:(row + len(vals)), 0] = vals
            inds[row:(row + len(vals)), 1:] = np.tile(aftervals[i, :], (len(vals), 1))
            row += len(vals)
        return inds


def model_compile(data):
    """
    Compile existing expanded correlation matrices.

    Parameters
    ----------
    data : list of model object file directories
        Compiles model objects

    Returns
    ----------
    model : Model object
        A new updated model object

    """
    from .load import load

    m = load(data[0])
    m.update(data[1:])
    return m


def _near_neighbor(bo, mo, match_threshold='auto'): #TODO: should this be part of bo.get_locs() or Brain.__init__, or possibly model.__init__?
    """
    Finds the nearest voxel for each subject's electrode location and uses
    that as revised electrodes location matrix in the prediction.

    Parameters
    ----------

    bo : Brain object
        Brain object to update

    mo : Model object
        Model object for the nearests locations used to predict

    match_threshold : 'auto', int, or None
        Threshold used to find nearest neighbor

        options:

        match_threshold = 'auto' : include only nearest neighbor if falls within one voxel distance

        match_threshold =  0 :set nearest_neighbor = False and proceed (only exact matches will be used)

        match_threshol = None use best match and don't check (even if far away)

        match_threshold > 0 : include only nearest neighbor that are within given distance

    Returns
    ----------
    bo : Brain object
        A new updated brain object

    """

    nbo = copy.deepcopy(bo) #FIXME: copying is expensive...
    nbo.orig_locs = nbo.locs
    d = cdist(nbo.locs, mo.locs, metric='Euclidean')
    for i in range(len(nbo.locs)):
        min_ind = list(zip(*np.where(d == d.min())))[0]
        nbo.locs.iloc[min_ind[0], :] = mo.locs.iloc[min_ind[1], :]
        d[min_ind[0]] = np.inf
        d[:, min_ind[1]] = np.inf
    if not match_threshold is 0 or None:

        if match_threshold is 'auto':
            v_size = _vox_size(mo.locs)
            thresh_bool = abs(nbo.locs - bo.locs) > v_size
            thresh_bool = thresh_bool.any(1).ravel()
        else:
            thresh_bool = abs(nbo.locs - bo.locs) > match_threshold
            thresh_bool = thresh_bool.any(1).ravel()
            assert match_threshold > 0, 'Negative Euclidean distances are not allowed'
        nbo.data = nbo.data.loc[:, ~thresh_bool]
        nbo.locs = nbo.locs.loc[~thresh_bool, :]
        nbo.n_elecs = nbo.data.shape[1]
        nbo.kurtosis = nbo.kurtosis[~thresh_bool]
        return nbo
    else:
        return nbo


def _vox_size(locs):
    """
    Finds voxel size

    Parameters
    ----------
    locs : pandas DataFrame
        Locations in brain extracted from nifti

    Returns
    ----------
    results : ndarray
        1 x n_dims of voxel size

    """
    from .brain import Brain
    bo_n = Brain(data=np.array([0]))
    n_dims = locs.shape[1]
    v_size = np.zeros([1, n_dims])
    # make voxel function
    for i in np.arange(n_dims):
        a = np.unique(locs.iloc[:, i])
        dists = pdist(np.atleast_2d(a).T, 'euclidean')
        #v_size[0][i] = np.min(dists[dists > 0])
        if np.sum(dists > 0) > 0:
            v_size[0][i] = np.min(dists[dists > 0])
        else:
            v_size[0][i] = bo_n.minimum_voxel_size
    return v_size

def _unique(X):
    """
    Wrapper for np.unique and pd.unique that also returns matching indices

    Parameters
    ----------
    X : numpy array or pandas dataframe with electrode locations

    Returns
    ----------
    unique_X : the sorted unique rows of X

    unique_inds : the indices of X such that X[unique_inds, :] == X (or X.iloc[unique_inds] == X)
    """
    if X is None:
        return None, []

    dataframe = type(X) is pd.DataFrame
    if dataframe:
        columns = X.columns
        X = X.as_matrix()

    assert type(X) is np.ndarray, 'must pass in a numpy ndarray or dataframe'
    uX, inds = np.unique(X, axis=0, return_index=True)

    if dataframe:
        uX = pd.DataFrame(data=uX, columns=columns, index=np.arange(len(inds)))


    return uX, inds

def _union(X, Y): #TODO: add test for _union
    """
    Wrapper for np.vstack and pd.vstack that returns unique locations

    Parameters
    ----------
    X : numpy array or pandas dataframe with electrode locations

    Y : numpy array or pandas dataframe with electrode locations

    Returns
    ----------
    XY: the unique values of X and Y stacked together in the same format.  If either X or Y is a dataframe, then XY
        is a dataframe with the same columsn.  Otherwise XY is a numpy array.
    """

    if X is None:
        return Y
    elif Y is None:
        return X

    dataframeX = type(X) is pd.DataFrame
    if dataframeX:
        columnsX = X.columns
        X = X.as_matrix()

    dataframeY = type(Y) is pd.DataFrame
    if dataframeY:
        columnsY = Y.columns
        Y = Y.as_matrix()

    if dataframeX and dataframeY:
        assert np.all(columnsX == columnsY), 'Input dataframes have mismatched columns'

    assert X.shape[1] == Y.shape[1], 'Input data must have the same number of columns'

    XY = np.vstack((X, Y))
    XY_unique, tmp = _unique(XY)

    if dataframeX or dataframeY:
        if dataframeX:
            columns = columnsX
        else:
            columns = columnsY
        return pd.DataFrame(data=XY_unique, columns=columns, index=np.arange(XY_unique.shape[0]))
    else:
        return XY


def _empty(X): #TODO: ad test for _empty
    """
    Return true if X is None or if any element of X.shape is 0
    """

    if X is None:
        return True
    else:
        return np.any(np.isclose(X.shape, 0))


def get_rows(all_locations, subj_locations):
    """
        This function indexes a subject's electrode locations in the full array of electrode locations

        Parameters
        ----------
        all_locations : ndarray
            Full array of electrode locations

        subj_locations : ndarray
            Array of subject's electrode locations

        Returns
        ----------
        results : list
            Indexs for subject electrodes in the full array of electrodes

        """
    if subj_locations.ndim == 1:
        subj_locations = subj_locations.reshape(1, 3)
    inds = np.full([1, subj_locations.shape[0]], np.nan)
    for i in range(subj_locations.shape[0]):
        possible_locations = np.ones([all_locations.shape[0], 1])
        try:
            for c in range(all_locations.shape[1]):
                possible_locations[all_locations[:, c] != subj_locations[i, c], :] = 0
            inds[0, i] = np.where(possible_locations == 1)[0][0]
        except:
            pass
    inds = inds[~np.isnan(inds)]
    return [int(x) for x in inds]


def known_unknown(fullarray, knownarray, subarray=None, electrode=None):
    """
        This finds the indices for known and unknown electrodes in the full array of electrode locations

        Parameters
        ----------
        fullarray : ndarray
            Full array of electrode locations - All electrodes that pass the kurtosis test

        knownarray : ndarray
            Subset of known electrode locations  - Subject's electrode locations that pass the kurtosis test (in the leave one out case, this is also has the specified location missing)

        subarray : ndarray
            Subject's electrode locations (all)

        electrode : str
            Index of electrode in subarray to remove (in the leave one out case)

        Returns
        ----------
        known_inds : list
            List of known indices

        unknown_inds : list
            List of unknown indices

        """
    ## where known electrodes are located in full matrix
    known_inds = get_rows(np.round(fullarray, 3), np.round(knownarray, 3))
    ## where the rest of the electrodes are located
    unknown_inds = list(set(range(np.shape(fullarray)[0])) - set(known_inds))
    if not electrode is None:
        ## where the removed electrode is located in full matrix
        rm_full_ind = get_rows(np.round(fullarray, 3), np.round(subarray[int(electrode)], 3))
        ## where the removed electrode is located in the unknown index subset
        rm_unknown_ind = np.where(np.array(unknown_inds) == np.array(rm_full_ind))[0].tolist()
        return known_inds, unknown_inds, rm_unknown_ind
    else:
        return known_inds, unknown_inds


def remove_electrode(subkarray, subarray, electrode):
    """
        Removes electrode from larger array

        Parameters
        ----------
        subkarray : ndarray
            Subject's electrode locations that pass the kurtosis test

        subarray : ndarray
            Subject's electrode locations (all)

        electrode : str
            Index of electrode in subarray to remove

        Returns
        ----------
        results : ndarray
            Subject's electrode locations that pass kurtosis test with electrode removed

        """
    rm_ind = get_rows(subkarray, subarray[electrode])
    other_inds = [i for i in range(np.shape(subkarray)[0]) if i != electrode]
    return np.delete(subkarray, rm_ind, 0), other_inds

def _count_overlapping(X, Y):
    """
    Finds overlapping rows in two matrices

    Parameters
    ----------
    X : Numpy array of reference data

    Y : Numpy array of to-be-tested data

    Returns
    ----------
    results : ndarray
        Array of length Y.shape[0] with 0s and 1s, where 1s denote rows in Y that are also in X
    """

    return np.sum([(Y == x).all(1) for idx, x in X.iterrows()], 0).astype(bool)


def make_gif_pngs(nifti, gif_path, index=range(100, 200), name=None, **kwargs):
    """
    Plots series of nifti timepoints as nilearn plot_glass_brain in .png format

    Parameters
    ----------
    nifti : nib.nifti1.Nifti1Image
        Nifti of reconconstruction

    gif_path : directory
        Directory to save .png files

    name : str
        Name for gif, default is None and will name the file based on the time windows

    window_min : int
        Lower bound for time window.

    window_max : int
        Upper bound for time window.

    Returns
    ----------
    results : png
        Series of pngs

    """

    for i in index:
        nii_i = image.index_img(nifti, i)
        outfile = os.path.join(gif_path, str(i).zfill(4) + '.png')
        ni_plt.plot_glass_brain(nii_i, output_file=outfile, **kwargs)

    images = []
    for file in os.listdir(gif_path):
        if file.endswith(".png"):
            images.append(imageio.imread(os.path.join(gif_path, file)))
    if name is None:
        gif_outfile = os.path.join(gif_path, 'gif_' + str(min(index)) + '_' + str(max(index)) + '.gif')

    else:
        gif_outfile = os.path.join(gif_path, str(name) + '.gif')
    imageio.mimsave(gif_outfile, images)


def _data_and_samplerate_by_file_index(bo, xform, **kwargs):
    """
    Session dependent function application and aggregation

    Parameters
    ----------
    bo : Brain object
        Contains data

    xform : function
        The function to apply to the data matrix from each filename

    aggregator: function
        Function for aggregating results across multiple iterations

    Returns
    ----------
    results : numpy ndarray
         Array of aggregated results

    """
    sample_rate = []

    for idx, session in enumerate(bo.sessions.unique()):
        if idx is 0:
            data_results, session_results, sr_results = xform(bo.data.loc[bo.sessions == session],
                                                                       bo.sessions.loc[bo.sessions == session],
                                                                       bo.sample_rate[idx], **kwargs)
            sample_rate.append(sr_results)
        else:
            data_next, session_next, sr_next = xform(bo.data.loc[bo.sessions == session, :],
                                                                           bo.sessions.loc[bo.sessions == session],
                                                                           bo.sample_rate[idx], **kwargs)
            data_results = data_results.append(data_next, ignore_index=True)
            session_results = session_results.append(session_next, ignore_index=True)
            sample_rate.append(sr_next)

    return data_results, session_results, sample_rate



def _resample(bo, resample_rate=64):
    """
    Function that resamples data to specified sample rate

    Parameters
    ----------
    bo : Brain object
        Contains data

    Returns
    ----------
    results: 2D np.ndarray
        Resampled data - pd.DataFrame
        Resampled sessions - pd.DataFrame
        Resample rate - List

    """


    def _resamp(data, session, sample_rate, resample_rate):

        # number of samples for resample
        n_samples = np.round(np.shape(data)[0] * resample_rate / sample_rate)

        # index for those samples
        resample_index = np.round(np.linspace(data.index.min(), data.index.max(), n_samples))

        # resampled sessions
        re_session = session[resample_index]
        re_session.interpolate(method='pchip', inplace=True, limit_direction='both')

        # resampled data
        re_data = data.loc[resample_index]
        re_data.interpolate(method='pchip', inplace=True, limit_direction='both')

        return re_data, re_session, resample_rate

    return _data_and_samplerate_by_file_index(bo, _resamp, resample_rate=resample_rate)


def _plot_locs_connectome(locs, label=None, pdfpath=None):
    """
    Plots locations in nilearn plot connectome

    Parameters
    ----------
    locs : pd.DataFrame
        Electrode locations

    Returns
    ----------
    results: nilearn connectome plot
        plot of electrodes


    """
    if locs.empty:
        ni_plt.plot_connectome(np.eye(locs.shape[0]), locs)
    else:

        if label is not None:

            label = list(label)
            for i, v in enumerate(label):
                if v == 'observed':
                    label[i] = [0, 0, 1]
                elif v == 'removed':
                    label[i] = [0.0, 0.75, 0.75]
                else:
                    label[i] = [1, 0, 1]
            colors = np.asarray(label)
            colors = list(map(lambda x: x[0], np.array_split(colors, colors.shape[0], axis=0)))
        else:
            colors = 'k'
        ni_plt.plot_connectome(np.eye(locs.shape[0]), locs, output_file=pdfpath,
                               node_kwargs={'alpha': 0.5, 'edgecolors': None},
                               node_size=10, node_color=colors)
    if not pdfpath:
        ni_plt.show()

def _plot_locs_hyp(locs, pdfpath): #TODO: do we need a separate function for this?  doesn't look more convenient than calling hyp.plot directly...

    """
    Plots locations in hypertools

    Parameters
    ----------
    locs : pd.DataFrame
        Electrode locations

    Returns
    ----------
    results: nilearn connectome plot
        plot of electrodes


    """
    hyp.plot(locs, 'k.', save_path=pdfpath)

def _plot_glass_brain(nifti, pdfpath, index=1): #TODO: do we need a separate function for this?  doesn't look more convenient than calling plot_glas_brain directly...
    """
    Plots nifti data

    Parameters
    ----------
    nifti : nifti image
        Nifti image to plot

    Returns
    ----------
    results: nilearn plot_glass_brain
        plot data


    """
    nii = image.index_img(nifti, index)
    ni_plt.plot_glass_brain(nii)
    if not pdfpath:
        ni_plt.show()

def _nifti_to_brain(nifti, mask_file=None):

    """
    Takes or loads nifti file and converts to brain object

    Parameters
    ----------
    nifti : str or nifti image

        If nifti is a nifti filepath, loads nifti and returns brain object

        If nifti is a nifti image, it returns a brain object

    Returns
    ----------
    results: brain object


    """
    from .nifti import Nifti

    if type(nifti) is Nifti:
        img = nifti

    elif type(nifti) is nib.nifti1.Nifti1Image:
        img = nifti

    elif type(nifti) is str:
        if os.path.exists(nifti):
            img = nib.load(nifti)
        else:
            warnings.warn('Nifti format not supported')
    else:
        warnings.warn('Nifti format not supported')

    mask = NiftiMasker(mask_strategy='background')
    if mask_file is None:
        mask.fit(nifti)
    else:
        mask.fit(mask_file)

    hdr = img.header
    S = img.get_sform()

    Y = np.float64(mask.transform(nifti)).copy()
    vmask = np.nonzero(np.array(np.reshape(mask.mask_img_.dataobj, (1, np.prod(mask.mask_img_.shape)), order='C')))[1]
    vox_coords = _fullfact(img.shape[0:3])[vmask, ::-1] - 1

    R = np.array(np.dot(vox_coords, S[0:3, 0:3])) + S[:3, 3]

    return Y, R, {'header': hdr, 'unscaled_timing':True}


def _brain_to_nifti(bo, nii_template): #FIXME: this is incredibly inefficient; could be done much faster using reshape and/or nilearn masking

    """
    Takes or loads nifti file and converts to brain object

    Parameters
    ----------
    bo : brain object

    template : str, Nifti1Image, or None

        Template is a nifti file with the desired resolution to save the brain object activity


    Returns
    ----------
    results: nibabel.Nifti1Image
        A nibabel nifti image


    """
    from .nifti import Nifti

    hdr = nii_template.get_header()
    temp_v_size = hdr.get_zooms()[0:3]

    R = bo.get_locs()
    Y = bo.data.as_matrix()
    Y = np.array(Y, ndmin=2)
    S = nii_template.affine
    locs = np.array(np.dot(R - S[:3, 3], np.linalg.inv(S[0:3, 0:3])), dtype='int')

    shape = np.max(np.vstack([np.max(locs, axis=0) + 1, nii_template.shape[0:3]]), axis=0)
    data = np.zeros(tuple(list(shape) + [Y.shape[0]]))
    counts = np.zeros(data.shape)

    for i in range(R.shape[0]):
        data[locs[i, 0], locs[i, 1], locs[i, 2], :] += Y[:, i]
        counts[locs[i, 0], locs[i, 1], locs[i, 2], :] += 1

    with np.errstate(invalid='ignore'):
        for i in range(R.shape[0]):
            data[locs[i, 0], locs[i, 1], locs[i, 2], :] = np.divide(data[locs[i, 0], locs[i, 1], locs[i, 2], :], counts[locs[i, 0], locs[i, 1], locs[i, 2], :])

    return Nifti(data, affine=nii_template.affine)




def _plot_borderless(x, savefile=None, vmin=-1, vmax=1, width=1000, dpi=100, cmap='Spectral'):
    _close_all()
    width *= (1000.0 / 775.0)  # account for border
    height = (775.0 / 755.0) * float(width) * float(x.shape[0]) / float(x.shape[1])  # correct height/width distortion

    fig = plt.figure(figsize=(width / float(dpi), height / float(dpi)), dpi=dpi)

    if len(x.shape) == 2:
        plt.pcolormesh(x, vmin=float(vmin), vmax=float(vmax), cmap=cmap)
    else:
        plt.imshow(x)
    ax = plt.gca()
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_frame_on(False)

    fig.set_frameon(False)

    if not savefile == None:
        fig.savefig(savefile, figsize=(width / float(dpi), height / float(dpi)), bbox_inches='tight', pad_inches=0,
                    dpi=dpi)
    return fig


def _plot_big_matrix(X, outfile, max_blocksize=1000, width=1000, vmin=-1, vmax=1):
    if os.path.isfile(outfile):
        img = plt.imread(outfile)
        _plot_borderless(img)
        return img

    tmpdir1, fname = os.path.split(outfile)
    tmpdir2, tmp = os.path.splitext(fname)
    tmpdir = os.path.join(tmpdir1, tmpdir2)
    tmp_fname = os.path.join(tmpdir, 'tmp.png')
    if not os.path.isdir(tmpdir):
        delete_tmpdir = True
        os.makedirs(tmpdir)
    else:
        delete_tmpdir = False

    def carve_bign(n):
        starts = range(1, n, max_blocksize)
        ends = starts[1:]
        ends.append(n)
        ends = np.unique(ends)
        starts = np.array(starts) - 1
        return starts, ends

    row_starts, row_ends = carve_bign(X.shape[0])
    col_starts, col_ends = carve_bign(X.shape[1])
    n = np.max(X.shape)

    for row in np.arange(len(row_starts)):
        for col in np.arange(len(col_starts)):
            next_block = X[row_starts[row]:row_ends[row], col_starts[col]:col_ends[col]]
            next_width = float(width) * float(col_ends[col] - col_starts[col]) / float(X.shape[1])
            _plot_borderless(next_block, tmp_fname, vmin=vmin, vmax=vmax, width=next_width, dpi=10)

            next_img = plt.imread(tmp_fname)
            next_img = np.flipud(next_img)

            if col == 0:
                row_img = next_img
            else:
                row_img = _safe_cat(row_img, next_img, 1)
            if n > 1e4:
                print('.', end='')
        if row == 0:
            full_img = row_img
        else:
            full_img = _safe_cat(full_img, row_img, 0)
        if n > 1e4:
            print('', end='\n')

    if delete_tmpdir:
        shutil.rmtree(tmpdir)
    else:
        os.remove(tmp_fname)

    _plot_borderless(full_img, outfile);
    return full_img

def _safe_cat(a, b, axis):
    dims = list(set(np.arange(a.ndim)) - set([axis]))
    for d in dims:
        if a.shape[d] > b.shape[d]:
            b = _padder(b, a, d)
        elif a.shape[d] < b.shape[d]:
            a = _padder(a, b, d)
    return np.concatenate((a, b), axis=axis)


def _padder(a, b, dims):
    if not np.iterable(dims):
        dims = [dims]
    dims = np.array(dims)
    dims[dims < 0] = 0
    dims = dims.tolist()

    padding = np.array(map(lambda x: int(x in dims), np.arange(a.ndim))) * (np.array(b.shape) - np.array(a.shape))
    padding = padding * (padding > 0)
    return np.pad(a, zip(np.zeros([1, a.ndim], dtype=int).tolist()[0], padding.tolist()), 'mean')

def _close_all():
    figs = plt.get_fignums()
    for f in figs:
        plt.close(f)