From f6e72cb7059add39c8a61d7dcc9e34505ea52510 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Wed, 31 Aug 2016 15:17:37 -0700
Subject: [PATCH 01/15] Enh: Add batch processing, cleanup

---
 scratch/shallow.py | 85 ++++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 33 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index efa6456..85da1d9 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -83,27 +83,40 @@ def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
     return evoked, stc
 
 
+def get_data_batch(x_data, y_label, batch_num, batch_size):
+    """Function to get a random sampling of an evoked and stc pair"""
+
+    # Get random sampling of data, seed by batch num
+    np.random.seed(batch_num)
+    rand_inds = np.random.randint(x_data.shape[0], size=batch_size)
+
+    return x_data[rand_inds, :], y_label[rand_inds, :]
+
+
 def weight_variable(shape):
     init = tf.truncated_normal(shape, stddev=0.11)
     return tf.Variable(init)
 
+
 def bias_variable(shape):
     init = tf.constant(0.1, shape=shape)
     return tf.Variable(init)
 
+
 def make_tanh_network(sensor_dim, source_dim):
+    """Function to create neural network"""
 
     x_sensor = tf.placeholder(tf.float32, shape=[None, sensor_dim], name="x_sensor")
 
-    W1 = weight_variable([sensor_dim, source_dim//2])
-    b1 = bias_variable([source_dim//2])
+    W1 = weight_variable([sensor_dim, source_dim // 2])
+    b1 = bias_variable([source_dim // 2])
     h1 = tf.nn.tanh(tf.matmul(x_sensor, W1) + b1)
 
-    W2 = weight_variable([source_dim//2, source_dim])
-    b2 = bias_variable([source_dim])
+    W2 = weight_variable([source_dim // 2, source_dim // 2])
+    b2 = bias_variable([source_dim // 2])
     h2 = tf.nn.tanh(tf.matmul(h1, W2) + b2)
 
-    W3 = weight_variable([source_dim, source_dim])
+    W3 = weight_variable([source_dim // 2, source_dim])
     b3 = bias_variable([source_dim])
     yhat = tf.nn.tanh(tf.matmul(h2, W3) + b3)
 
@@ -120,8 +133,8 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     error = tf.reduce_sum(tf.squared_difference(y_source, yhat))
 
     # Remap activations to [0,1]
-    a1 = 0.5*h1 + 0.5
-    a2 = 0.5*h2 + 0.5
+    a1 = 0.5 * h1 + 0.5
+    a2 = 0.5 * h2 + 0.5
 
     kl_bernoulli_h1 = (rho*(tf.log(rho) - tf.log(a1+1e-6)
                       + (1-rho)*(tf.log(1-rho) - tf.log(1-a1+1e-6))))
@@ -149,12 +162,13 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
         struct = argv['--subj']
         subj = subjects[structurals.index(struct)]
 
-    # Params
-    n_times = 250
+    # Set params
+    n_data_times = 2000
     dt = 0.001
     noise_snr = np.inf
+    batch_size = 1000
 
-    niter = 100
+    n_iter = int(1e4)
     rho = 0.05
     lam = 1.
 
@@ -162,40 +176,45 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
     n_verts = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
-    sim_vert_data = np.random.randn(n_verts, n_times)
+    sim_vert_data = np.random.randn(n_verts, n_data_times)
 
-    print("applying forward operator")
+    print("Simulating and normalizing data")
     evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
                                      noise_snr)
+    # Normalize data to lie between -1 and 1
+    x_sens_all = np.ascontiguousarray(evoked.data.T)
+    x_sens_all /= np.max(np.abs(x_sens_all))
+    y_src_all = np.ascontiguousarray(sim_vert_data.T)
+    y_src_all /= np.max(np.abs(y_src_all))
 
-    print("building neural net")
+    print("Building neural net")
     sess = tf.Session()
 
-    # Create neural network
     sensor_dim = evoked.data.shape[0]
     source_dim = n_verts
-    
+
     yhat, h1, h2, x_sensor = make_tanh_network(sensor_dim, source_dim)
-    sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim, source_dim,
-                                                       yhat, h1, h2, sess)
+    sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim,
+                                                             source_dim, yhat,
+                                                             h1, h2, sess)
 
-    train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost) 
+    train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost)
 
     sess.run(tf.initialize_all_variables())
-
-    x_sens = np.ascontiguousarray(evoked.data.T)
-    x_sens /= np.max(np.abs(x_sens))
-    y_src = np.ascontiguousarray(sim_vert_data.T)
-    y_src /= np.max(np.abs(y_src))
-
-    print("optimizing...")
-    niter = 100
-    for i in xrange(niter):
-        _, obj = sess.run([train_step, sparse_cost],
-                          feed_dict={x_sensor: x_sens, y_source: y_src,
-                                     tf_rho: rho, tf_lam: lam}
-                         )
-
-        print("  it: %d i, cost: %.2f" % (i+1, obj))
+    print("\nSim params\nn_iter: {}\nn_data_points: {}\nSNR: \
+          {}\nbatch_size: {}\n".format(n_iter, n_data_times, str(noise_snr),
+                                       batch_size))
+    print("Optimizing...")
+    for ii in xrange(n_iter):
+        # Get random batch of data
+        x_sens_batch, y_src_batch = get_data_batch(x_sens_all, y_src_all, ii,
+                                                   batch_size)
+
+        feed_dict = {x_sensor: x_sens_batch, y_source: y_src_batch,
+                     tf_rho: rho, tf_lam: lam}
+        _, obj = sess.run([train_step, sparse_cost], feed_dict)
+
+        if ii % 10 == 0:
+            print("\titer: %d, cost: %.2f" % (ii, obj))
 
     # Evaluate net

From 3028dd06f7a32338601c1bb56b33b53e66185e3c Mon Sep 17 00:00:00 2001
From: Mark Wronkiewicz <wronk.mark@gmail.com>
Date: Thu, 1 Sep 2016 17:44:34 -0700
Subject: [PATCH 02/15] Add some basic evaluation code (#2)

---
 scratch/shallow.py | 149 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 144 insertions(+), 5 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index 85da1d9..a624c6f 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -74,7 +74,7 @@ def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
     """Function to generate evoked and stc from signal array"""
 
     vertices = [fwd['src'][0]['vertno'], fwd['src'][1]['vertno']]
-    stc = SourceEstimate(sim_vert_data, vertices, tmin=0, tstep=dt)
+    stc = SourceEstimate(signal, vertices, tmin=0, tstep=dt)
 
     evoked = simulate_evoked(fwd, stc, evoked_info, cov, noise_snr,
                              random_state=seed)
@@ -148,6 +148,113 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     return cost, y_source, rho, lam
 
 
+def eval_error_norm(src_data_orig, src_data_est):
+    """Function to compute norm of the error vector at each dipoe
+
+    Parameters
+    ----------
+
+    src_data_orig: numpy matrix size (n_samples x n_src)
+        Ground truth source estimate used to generate sensor data
+
+    src_data_est: numpy matrix (n_samples x n_src)
+        Source estimate of sensor data created using src_data_orig
+
+    Returns
+    -------
+    error_norm: np.array size(n_samples)
+        Norm of vector between true activation and estimated activation
+
+    """
+
+    #TODO: might want to normalize by number of vertices since subject source
+    #      spaces can have different number of dipoles
+
+    error_norm = np.zeros((src_data_orig.shape[0]))
+
+    for ri, (row_orig, row_est) in enumerate(zip(src_data_orig, src_data_est)):
+        error_norm[ri] = np.linalg.norm(row_orig - row_est)
+
+    return error_norm
+
+
+def get_all_vert_positions(src):
+    """Function to get 3-space position of used dipoles
+
+    Parameters
+    ----------
+    src: SourceSpaces
+        Source space object for subject. Needed to get dipole positions
+
+    Returns
+    -------
+    dip_pos: np.array shape(n_src x 3)
+        3-space positions of used dipoles
+    """
+    # Get vertex numbers and positions that are in use
+    # (usually ~4k - 5k out of ~150k)
+    left_vertno = src[0]['vertno']
+    right_vertno = src[1]['vertno']
+
+    vertnos = np.concatenate((left_vertno, right_vertno))
+    dip_pos = np.concatenate((src[0]['rr'][left_vertno, :],
+                              src[1]['rr'][right_vertno, :]))
+
+    return dip_pos
+
+
+def get_largest_dip_positions(data, n_verts, dip_pos):
+    """Function to get spatial centroid of highest activated dipoles
+
+    Parameters
+    ----------
+    data: np.array shape(n_times x n_src)
+        Source estimate data
+    n_verts: int
+        Number of vertices to use when computing maximum activation centroid
+    dip_pos: np.array shape(n_src x 3)
+        3-space positions of all dipoles in source space .
+
+    Returns
+    -------
+    avg_pos: np.array shape(n_times x 3)
+        Euclidean centroid of activation for largest `n_verts` activations
+    """
+
+    #TODO: How to handle negative current vals? Use abs?
+
+    # Initialize
+    largest_dip_pos = np.zeros((data.shape[0], n_verts, 3))
+
+    # Find largest `n_verts` dipoles at each time point and get position
+    for ti in range(data.shape[0]):
+        largest_dip_inds = data[ti, :].argsort()[-n_verts:]
+        largest_dip_pos[ti, :, :] = dip_pos[largest_dip_inds, :]
+
+    return largest_dip_pos
+
+
+def get_localization_metrics(true_pos, largest_dip_pos):
+    """Helper to get accuracy and point spread
+
+    Parameters
+    ----------
+    true_pos: np.array shape(n_times, 3)
+        3D position of dipole that was simulated active
+    largest_dip_pos: np.array shape(n_times, n_dipoles, 3)
+        3D positions of top `n_dipoles` dipoles with highest activation"""
+
+    centroids = np.mean(largest_dip_pos, axis=1)
+    accuracy = np.linalg.norm(true_pos - centroids)
+
+    # Calculate difference in x/y/z positions from true activation to each src
+    point_distance = np.subtract(largest_dip_pos, true_pos[:, np.newaxis, :])
+
+    # Calculate Euclidean distance (w/ norm) and take mean over all dipoles
+    point_spread = np.mean(np.linalg.norm(point_distance, axis=-1), axis=-1)
+
+    return accuracy, point_spread
+
 if __name__ == "__main__":
 
     from docopt import docopt
@@ -163,12 +270,12 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
         subj = subjects[structurals.index(struct)]
 
     # Set params
-    n_data_times = 2000
+    n_data_times = 50000
     dt = 0.001
     noise_snr = np.inf
     batch_size = 1000
 
-    n_iter = int(1e4)
+    n_iter = int(100000)
     rho = 0.05
     lam = 1.
 
@@ -201,7 +308,7 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost)
 
     sess.run(tf.initialize_all_variables())
-    print("\nSim params\nn_iter: {}\nn_data_points: {}\nSNR: \
+    print("\nSim params\n----------\nn_iter: {}\nn_data_points: {}\nSNR: \
           {}\nbatch_size: {}\n".format(n_iter, n_data_times, str(noise_snr),
                                        batch_size))
     print("Optimizing...")
@@ -217,4 +324,36 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
         if ii % 10 == 0:
             print("\titer: %d, cost: %.2f" % (ii, obj))
 
-    # Evaluate net
+    #
+    # Evaluate network
+    #
+    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
+
+    print("\nEvaluating...\n")
+
+    # Simulate identity activations
+    rand_verts = np.sort(np.random.randint(0, n_verts, n_eval_data_times))
+    sim_vert_eval_data = np.eye(n_verts)[:, rand_verts] * 0.1
+    evoked_eval, stc_eval = gen_evoked_subject(sim_vert_eval_data, fwd, cov,
+                                               evoked_info, dt, noise_snr)
+
+    x_sens_eval = np.ascontiguousarray(evoked_eval.data.T)
+    y_src_eval = np.ascontiguousarray(stc_eval.data.T)
+
+    feed_dict = {x_sensor: x_sens_eval, y_source: y_src_eval,
+                 tf_rho: rho, tf_lam: lam}
+    src_est = sess.run(yhat, feed_dict)
+
+    # Calculate vector norm error
+    error_norm = eval_error_norm(y_src_eval, src_est)
+
+    dip_positions = get_all_vert_positions(inv['src'])
+
+    # Ground truth pos
+    activation_positions = dip_positions[rand_verts, :]
+    # Get position of most active dipoles
+    largest_dip_positions = get_largest_dip_positions(src_est, 25,
+                                                      dip_positions)
+    # Calculate accuracy metrics (in meters)
+    accuracy, point_spread = get_localization_metrics(activation_positions,
+                                                      largest_dip_positions)

From c5177d452cf3c1082743046085537ef4c94855c3 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Fri, 2 Sep 2016 15:05:22 -0700
Subject: [PATCH 03/15] Add saving, offload functions to separate file

---
 scratch/shallow.py | 244 +++++++--------------------------------------
 1 file changed, 36 insertions(+), 208 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index a624c6f..fc79c47 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -18,15 +18,19 @@
 import os
 import os.path as op
 
+import tensorflow as tf
+import numpy as np
+
 import mne
 from mne import SourceEstimate
 from mne.minimum_norm import read_inverse_operator
 from mne.simulation import simulate_sparse_stc, simulate_stc, simulate_evoked
 from mne.externals.h5io import read_hdf5, write_hdf5
 
-import tensorflow as tf
-import numpy as np
-
+from shallow_fun import (load_subject_objects, gen_evoked_subject,
+                         get_data_batch, get_all_vert_positions,
+                         get_largest_dip_positions, get_localization_metrics,
+                         eval_error_norm)
 
 # Entries in structurals and subjects must correspoond,
 # i.e. structurals[i] === subjects[i].
@@ -44,80 +48,39 @@
 subjects = subjects[:-1]
 
 
-def load_subject_objects(megdatadir, subj, struct):
-
-    print("  %s: -- loading meg objects" % subj)
-
-    fname_fwd = op.join(megdatadir, subj, 'forward',
-                        '%s-sss-fwd.fif' % subj)
-    fwd = mne.read_forward_solution(fname_fwd, force_fixed=True, surf_ori=True)
-
-    fname_inv = op.join(megdatadir, subj, 'inverse',
-                        '%s-55-sss-meg-eeg-fixed-inv.fif' % subj)
-    inv = read_inverse_operator(fname_inv)
-
-    fname_epochs = op.join(megdatadir, subj, 'epochs',
-                           'All_55-sss_%s-epo.fif' % subj)
-    #epochs = mne.read_epochs(fname_epochs)
-    #evoked = epochs.average()
-    #evoked_info = evoked.info
-    evoked_info = mne.io.read_info(fname_epochs)
-    cov = inv['noise_cov']
-
-    print("  %s: -- finished loading meg objects" % subj)
-
-    return subj, fwd, inv, cov, evoked_info
-
-
-def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
-                       seed=None):
-    """Function to generate evoked and stc from signal array"""
-
-    vertices = [fwd['src'][0]['vertno'], fwd['src'][1]['vertno']]
-    stc = SourceEstimate(signal, vertices, tmin=0, tstep=dt)
-
-    evoked = simulate_evoked(fwd, stc, evoked_info, cov, noise_snr,
-                             random_state=seed)
-    evoked.add_eeg_average_proj()
-
-    return evoked, stc
-
-
-def get_data_batch(x_data, y_label, batch_num, batch_size):
-    """Function to get a random sampling of an evoked and stc pair"""
-
-    # Get random sampling of data, seed by batch num
-    np.random.seed(batch_num)
-    rand_inds = np.random.randint(x_data.shape[0], size=batch_size)
-
-    return x_data[rand_inds, :], y_label[rand_inds, :]
-
-
-def weight_variable(shape):
+def weight_variable(shape, name=None):
     init = tf.truncated_normal(shape, stddev=0.11)
+    if name is not None:
+        return tf.Variable(init, name=name)
+
     return tf.Variable(init)
 
 
-def bias_variable(shape):
+def bias_variable(shape, name=None):
     init = tf.constant(0.1, shape=shape)
+
+    if name is not None:
+        return tf.Variable(init, name=name)
+
     return tf.Variable(init)
 
 
 def make_tanh_network(sensor_dim, source_dim):
     """Function to create neural network"""
 
-    x_sensor = tf.placeholder(tf.float32, shape=[None, sensor_dim], name="x_sensor")
+    x_sensor = tf.placeholder(tf.float32, shape=[None, sensor_dim],
+                              name="x_sensor")
 
-    W1 = weight_variable([sensor_dim, source_dim // 2])
-    b1 = bias_variable([source_dim // 2])
+    W1 = weight_variable([sensor_dim, source_dim // 2], name='W1')
+    b1 = bias_variable([source_dim // 2], name='b1')
     h1 = tf.nn.tanh(tf.matmul(x_sensor, W1) + b1)
 
-    W2 = weight_variable([source_dim // 2, source_dim // 2])
-    b2 = bias_variable([source_dim // 2])
+    W2 = weight_variable([source_dim // 2, source_dim // 2], name='W2')
+    b2 = bias_variable([source_dim // 2], name='b2')
     h2 = tf.nn.tanh(tf.matmul(h1, W2) + b2)
 
-    W3 = weight_variable([source_dim // 2, source_dim])
-    b3 = bias_variable([source_dim])
+    W3 = weight_variable([source_dim // 2, source_dim], name='W3')
+    b3 = bias_variable([source_dim], name='b3')
     yhat = tf.nn.tanh(tf.matmul(h2, W3) + b3)
 
     return yhat, h1, h2, x_sensor
@@ -136,125 +99,17 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     a1 = 0.5 * h1 + 0.5
     a2 = 0.5 * h2 + 0.5
 
-    kl_bernoulli_h1 = (rho*(tf.log(rho) - tf.log(a1+1e-6)
-                      + (1-rho)*(tf.log(1-rho) - tf.log(1-a1+1e-6))))
-    kl_bernoulli_h2 = (rho*(tf.log(rho) - tf.log(a2+1e-6)
-                      + (1-rho)*(tf.log(1-rho) - tf.log(1-a2+1e-6))))
+    kl_bernoulli_h1 = (rho * (tf.log(rho) - tf.log(a1 + 1e-6) + (1 - rho) *
+                              (tf.log(1 - rho) - tf.log(1 - a1 + 1e-6))))
+    kl_bernoulli_h2 = (rho * (tf.log(rho) - tf.log(a2 + 1e-6) + (1 - rho) *
+                              (tf.log(1 - rho) - tf.log(1 - a2 + 1e-6))))
     regularizer = (tf.reduce_sum(kl_bernoulli_h1)
                    + tf.reduce_sum(kl_bernoulli_h2))
 
-    cost = error + lam*regularizer
+    cost = error + lam * regularizer
 
     return cost, y_source, rho, lam
 
-
-def eval_error_norm(src_data_orig, src_data_est):
-    """Function to compute norm of the error vector at each dipoe
-
-    Parameters
-    ----------
-
-    src_data_orig: numpy matrix size (n_samples x n_src)
-        Ground truth source estimate used to generate sensor data
-
-    src_data_est: numpy matrix (n_samples x n_src)
-        Source estimate of sensor data created using src_data_orig
-
-    Returns
-    -------
-    error_norm: np.array size(n_samples)
-        Norm of vector between true activation and estimated activation
-
-    """
-
-    #TODO: might want to normalize by number of vertices since subject source
-    #      spaces can have different number of dipoles
-
-    error_norm = np.zeros((src_data_orig.shape[0]))
-
-    for ri, (row_orig, row_est) in enumerate(zip(src_data_orig, src_data_est)):
-        error_norm[ri] = np.linalg.norm(row_orig - row_est)
-
-    return error_norm
-
-
-def get_all_vert_positions(src):
-    """Function to get 3-space position of used dipoles
-
-    Parameters
-    ----------
-    src: SourceSpaces
-        Source space object for subject. Needed to get dipole positions
-
-    Returns
-    -------
-    dip_pos: np.array shape(n_src x 3)
-        3-space positions of used dipoles
-    """
-    # Get vertex numbers and positions that are in use
-    # (usually ~4k - 5k out of ~150k)
-    left_vertno = src[0]['vertno']
-    right_vertno = src[1]['vertno']
-
-    vertnos = np.concatenate((left_vertno, right_vertno))
-    dip_pos = np.concatenate((src[0]['rr'][left_vertno, :],
-                              src[1]['rr'][right_vertno, :]))
-
-    return dip_pos
-
-
-def get_largest_dip_positions(data, n_verts, dip_pos):
-    """Function to get spatial centroid of highest activated dipoles
-
-    Parameters
-    ----------
-    data: np.array shape(n_times x n_src)
-        Source estimate data
-    n_verts: int
-        Number of vertices to use when computing maximum activation centroid
-    dip_pos: np.array shape(n_src x 3)
-        3-space positions of all dipoles in source space .
-
-    Returns
-    -------
-    avg_pos: np.array shape(n_times x 3)
-        Euclidean centroid of activation for largest `n_verts` activations
-    """
-
-    #TODO: How to handle negative current vals? Use abs?
-
-    # Initialize
-    largest_dip_pos = np.zeros((data.shape[0], n_verts, 3))
-
-    # Find largest `n_verts` dipoles at each time point and get position
-    for ti in range(data.shape[0]):
-        largest_dip_inds = data[ti, :].argsort()[-n_verts:]
-        largest_dip_pos[ti, :, :] = dip_pos[largest_dip_inds, :]
-
-    return largest_dip_pos
-
-
-def get_localization_metrics(true_pos, largest_dip_pos):
-    """Helper to get accuracy and point spread
-
-    Parameters
-    ----------
-    true_pos: np.array shape(n_times, 3)
-        3D position of dipole that was simulated active
-    largest_dip_pos: np.array shape(n_times, n_dipoles, 3)
-        3D positions of top `n_dipoles` dipoles with highest activation"""
-
-    centroids = np.mean(largest_dip_pos, axis=1)
-    accuracy = np.linalg.norm(true_pos - centroids)
-
-    # Calculate difference in x/y/z positions from true activation to each src
-    point_distance = np.subtract(largest_dip_pos, true_pos[:, np.newaxis, :])
-
-    # Calculate Euclidean distance (w/ norm) and take mean over all dipoles
-    point_spread = np.mean(np.linalg.norm(point_distance, axis=-1), axis=-1)
-
-    return accuracy, point_spread
-
 if __name__ == "__main__":
 
     from docopt import docopt
@@ -279,6 +134,9 @@ def get_localization_metrics(true_pos, largest_dip_pos):
     rho = 0.05
     lam = 1.
 
+    save_network = True
+    fpath_save = op.join('model_subj_{}_iters.meta'.format(n_iter))
+
     # Get subject info and create data
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
@@ -299,7 +157,6 @@ def get_localization_metrics(true_pos, largest_dip_pos):
 
     sensor_dim = evoked.data.shape[0]
     source_dim = n_verts
-
     yhat, h1, h2, x_sensor = make_tanh_network(sensor_dim, source_dim)
     sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim,
                                                              source_dim, yhat,
@@ -307,6 +164,7 @@ def get_localization_metrics(true_pos, largest_dip_pos):
 
     train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost)
 
+    saver = tf.train.Saver()
     sess.run(tf.initialize_all_variables())
     print("\nSim params\n----------\nn_iter: {}\nn_data_points: {}\nSNR: \
           {}\nbatch_size: {}\n".format(n_iter, n_data_times, str(noise_snr),
@@ -317,6 +175,7 @@ def get_localization_metrics(true_pos, largest_dip_pos):
         x_sens_batch, y_src_batch = get_data_batch(x_sens_all, y_src_all, ii,
                                                    batch_size)
 
+        # Take training step
         feed_dict = {x_sensor: x_sens_batch, y_source: y_src_batch,
                      tf_rho: rho, tf_lam: lam}
         _, obj = sess.run([train_step, sparse_cost], feed_dict)
@@ -324,36 +183,5 @@ def get_localization_metrics(true_pos, largest_dip_pos):
         if ii % 10 == 0:
             print("\titer: %d, cost: %.2f" % (ii, obj))
 
-    #
-    # Evaluate network
-    #
-    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
-
-    print("\nEvaluating...\n")
-
-    # Simulate identity activations
-    rand_verts = np.sort(np.random.randint(0, n_verts, n_eval_data_times))
-    sim_vert_eval_data = np.eye(n_verts)[:, rand_verts] * 0.1
-    evoked_eval, stc_eval = gen_evoked_subject(sim_vert_eval_data, fwd, cov,
-                                               evoked_info, dt, noise_snr)
-
-    x_sens_eval = np.ascontiguousarray(evoked_eval.data.T)
-    y_src_eval = np.ascontiguousarray(stc_eval.data.T)
-
-    feed_dict = {x_sensor: x_sens_eval, y_source: y_src_eval,
-                 tf_rho: rho, tf_lam: lam}
-    src_est = sess.run(yhat, feed_dict)
-
-    # Calculate vector norm error
-    error_norm = eval_error_norm(y_src_eval, src_est)
-
-    dip_positions = get_all_vert_positions(inv['src'])
-
-    # Ground truth pos
-    activation_positions = dip_positions[rand_verts, :]
-    # Get position of most active dipoles
-    largest_dip_positions = get_largest_dip_positions(src_est, 25,
-                                                      dip_positions)
-    # Calculate accuracy metrics (in meters)
-    accuracy, point_spread = get_localization_metrics(activation_positions,
-                                                      largest_dip_positions)
+    if save_network:
+        saver.save(sess, 'model_{}'.format(struct))

From f1545ae0360d168b2b697fb3b569e0dd66421994 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Fri, 2 Sep 2016 15:05:32 -0700
Subject: [PATCH 04/15] Move functions to separate file

---
 scratch/__init__.py    |   0
 scratch/eval_model.py  | 160 +++++++++++++++++++++++++++++++++++++
 scratch/shallow_fun.py | 175 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 335 insertions(+)
 create mode 100644 scratch/__init__.py
 create mode 100644 scratch/eval_model.py
 create mode 100644 scratch/shallow_fun.py

diff --git a/scratch/__init__.py b/scratch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scratch/eval_model.py b/scratch/eval_model.py
new file mode 100644
index 0000000..5a47aa3
--- /dev/null
+++ b/scratch/eval_model.py
@@ -0,0 +1,160 @@
+"""eval_model.py
+
+    Evaluate deep neural net on sensor space signals from known source space
+    signals. Tests localization error and point spread
+
+    Usage:
+      shallow.py <megdir> <structdir> <nn_fname> [--subj=<subj>]
+      shallow.py (-h | --help)
+
+    Options:
+      --subj=<subj>     Specify subject to process with structural name
+      -h, --help        Show this screen
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import os.path as op
+
+import mne
+from mne.simulation import simulate_sparse_stc, simulate_stc, simulate_evoked
+
+import tensorflow as tf
+import numpy as np
+
+from shallow_fun import (load_subject_objects, gen_evoked_subject,
+                         get_all_vert_positions, get_largest_dip_positions,
+                         get_localization_metrics, eval_error_norm)
+from shallow import make_tanh_network, sparse_objective
+
+
+# Entries in structurals and subjects must correspoond,
+# i.e. structurals[i] === subjects[i].
+structurals = ['AKCLEE_103', 'AKCLEE_104', 'AKCLEE_105', 'AKCLEE_106',
+               'AKCLEE_107', 'AKCLEE_109', 'AKCLEE_110', 'AKCLEE_115',
+               'AKCLEE_117', 'AKCLEE_118', 'AKCLEE_119', 'AKCLEE_121',
+               'AKCLEE_125', 'AKCLEE_126', 'AKCLEE_131', 'AKCLEE_132']
+subjects = ['eric_sps_03', 'eric_sps_04', 'eric_sps_05', 'eric_sps_06',
+            'eric_sps_07', 'eric_sps_09', 'eric_sps_10', 'eric_sps_15',
+            'eric_sps_17', 'eric_sps_18', 'eric_sps_19', 'eric_sps_21',
+            'eric_sps_25', 'eric_sps_26', 'eric_sps_31', 'eric_sps_32']
+
+# Removing eric_sps_32/AKCL_132 b/c of vertex issue
+structurals = structurals[:-1]
+subjects = subjects[:-1]
+
+
+if __name__ == "__main__":
+
+    from docopt import docopt
+    argv = docopt(__doc__)
+
+    megdir = argv['<megdir>']
+    structdir = argv['<structdir>']
+    model_fname = argv['<nn_fname>']
+
+    struct = None
+    subj = None
+    if argv['--subj']:
+        struct = argv['--subj']
+        subj = subjects[structurals.index(struct)]
+
+    # Set params
+    n_data_times = 1000
+    dt = 0.001
+    noise_snr = np.inf
+    rho = 0.05
+    lam = 1.
+
+    sess = tf.Session()
+
+    #saver = tf.train.import_meta_graph('model_AKCLEE_107.meta')
+    #saver.restore(sess, model_fname.split('.')[:-1])
+    #yhat = tf.get_collection(['yhat'][0])
+
+    print('\nLoaded saved model: %s\n' % model_fname)
+    print("\nEvaluating...\n")
+
+    # Get subject info and create data
+    subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
+                                                            struct)
+    n_verts = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
+
+    # Simulate identity activations
+    sim_vert_data = np.eye(n_verts)[:, :500] * 0.1
+    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
+                                     noise_snr)
+
+    print("Simulating and normalizing data")
+    sens_dim = evoked.data.shape[0]
+    src_dim = n_verts
+
+    # Reconstruct network
+    yhat, h1, h2, x_sensor = make_tanh_network(sens_dim, src_dim)
+    sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sens_dim, src_dim,
+                                                             yhat, h1, h2,
+                                                             sess)
+    saver = tf.train.Saver()
+    saver.restore(sess, 'model_AKCLEE_107')
+
+    #
+    # Evaluate network
+    #
+    x_sens = np.ascontiguousarray(evoked.data.T)
+    y_src = np.ascontiguousarray(stc.data.T)
+
+    feed_dict = {x_sensor: x_sens, y_source: y_src, tf_rho: rho, tf_lam: lam}
+    src_est = sess.run(yhat, feed_dict)
+
+    # Calculate vector norm error
+    error_norm = eval_error_norm(y_src, src_est)
+    dip_positions = get_all_vert_positions(inv['src'])
+
+    # Ground truth pos
+    #activation_positions = dip_positions[rand_verts, :]
+    activation_positions = dip_positions[:500, :]
+    # Get position of most active dipoles
+    largest_dip_positions = get_largest_dip_positions(src_est, 25,
+                                                      dip_positions)
+    # Calculate accuracy metrics (in meters)
+    accuracy, point_spread = get_localization_metrics(activation_positions,
+                                                      largest_dip_positions)
+    print('Accuracy: {}, Avg. Point spread: {}'.format(accuracy,
+                                                       np.mean(point_spread)))
+
+    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
+
+    print("\nEvaluating...\n")
+
+    # Simulate identity activations
+    #rand_verts = np.sort(np.random.randint(0, n_verts, n_eval_data_times))
+    #sim_vert_eval_data = np.eye(n_verts)[:, rand_verts] * 0.1
+    sim_vert_eval_data = np.eye(n_verts)[:, :500] * 0.1
+    evoked_eval, stc_eval = gen_evoked_subject(sim_vert_eval_data, fwd, cov,
+                                               evoked_info, dt, noise_snr)
+
+    x_sens_eval = np.ascontiguousarray(evoked_eval.data.T)
+    y_src_eval = np.ascontiguousarray(stc_eval.data.T)
+
+    feed_dict = {x_sensor: x_sens_eval, y_source: y_src_eval,
+                 tf_rho: rho, tf_lam: lam}
+    src_est = sess.run(yhat, feed_dict)
+
+    # Calculate vector norm error
+    error_norm = eval_error_norm(y_src_eval, src_est)
+
+    dip_positions = get_all_vert_positions(inv['src'])
+
+    # Ground truth pos
+    activation_positions = dip_positions[:500, :]
+    # Get position of most active dipoles
+    largest_dip_positions = get_largest_dip_positions(src_est, 25,
+                                                      dip_positions)
+    # Calculate accuracy metrics (in meters)
+    accuracy, point_spread = get_localization_metrics(activation_positions,
+                                                      largest_dip_positions)
+    print('Accuracy: {}, Avg. Point spread: {}'.format(accuracy,
+                                                       np.mean(point_spread)))
+
diff --git a/scratch/shallow_fun.py b/scratch/shallow_fun.py
new file mode 100644
index 0000000..4b58e55
--- /dev/null
+++ b/scratch/shallow_fun.py
@@ -0,0 +1,175 @@
+"""
+shallow_fun.py
+
+Collection of helper functions
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import os.path as op
+
+import numpy as np
+
+import mne
+from mne import SourceEstimate
+from mne.minimum_norm import read_inverse_operator
+from mne.simulation import simulate_sparse_stc, simulate_stc, simulate_evoked
+
+
+def load_subject_objects(megdatadir, subj, struct):
+
+    print("  %s: -- loading meg objects" % subj)
+
+    fname_fwd = op.join(megdatadir, subj, 'forward',
+                        '%s-sss-fwd.fif' % subj)
+    fwd = mne.read_forward_solution(fname_fwd, force_fixed=True, surf_ori=True)
+
+    fname_inv = op.join(megdatadir, subj, 'inverse',
+                        '%s-55-sss-meg-eeg-fixed-inv.fif' % subj)
+    inv = read_inverse_operator(fname_inv)
+
+    fname_epochs = op.join(megdatadir, subj, 'epochs',
+                           'All_55-sss_%s-epo.fif' % subj)
+    #epochs = mne.read_epochs(fname_epochs)
+    #evoked = epochs.average()
+    #evoked_info = evoked.info
+    evoked_info = mne.io.read_info(fname_epochs)
+    cov = inv['noise_cov']
+
+    print("  %s: -- finished loading meg objects" % subj)
+
+    return subj, fwd, inv, cov, evoked_info
+
+
+def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
+                       seed=None):
+    """Function to generate evoked and stc from signal array"""
+
+    vertices = [fwd['src'][0]['vertno'], fwd['src'][1]['vertno']]
+    stc = SourceEstimate(signal, vertices, tmin=0, tstep=dt)
+
+    evoked = simulate_evoked(fwd, stc, evoked_info, cov, noise_snr,
+                             random_state=seed)
+    evoked.add_eeg_average_proj()
+
+    return evoked, stc
+
+
+def get_data_batch(x_data, y_label, batch_num, batch_size):
+    """Function to get a random sampling of an evoked and stc pair"""
+
+    # Get random sampling of data, seed by batch num
+    np.random.seed(batch_num)
+    rand_inds = np.random.randint(x_data.shape[0], size=batch_size)
+
+    return x_data[rand_inds, :], y_label[rand_inds, :]
+
+
+def get_all_vert_positions(src):
+    """Function to get 3-space position of used dipoles
+
+    Parameters
+    ----------
+    src: SourceSpaces
+        Source space object for subject. Needed to get dipole positions
+
+    Returns
+    -------
+    dip_pos: np.array shape(n_src x 3)
+        3-space positions of used dipoles
+    """
+    # Get vertex numbers and positions that are in use
+    # (usually ~4k - 5k out of ~150k)
+    left_vertno = src[0]['vertno']
+    right_vertno = src[1]['vertno']
+
+    vertnos = np.concatenate((left_vertno, right_vertno))
+    dip_pos = np.concatenate((src[0]['rr'][left_vertno, :],
+                              src[1]['rr'][right_vertno, :]))
+
+    return dip_pos
+
+
+def eval_error_norm(src_data_orig, src_data_est):
+    """Function to compute norm of the error vector at each dipoe
+
+    Parameters
+    ----------
+
+    src_data_orig: numpy matrix size (n_samples x n_src)
+        Ground truth source estimate used to generate sensor data
+
+    src_data_est: numpy matrix (n_samples x n_src)
+        Source estimate of sensor data created using src_data_orig
+
+    Returns
+    -------
+    error_norm: np.array size(n_samples)
+        Norm of vector between true activation and estimated activation
+
+    """
+
+    #TODO: might want to normalize by number of vertices since subject source
+    #      spaces can have different number of dipoles
+
+    error_norm = np.zeros((src_data_orig.shape[0]))
+
+    for ri, (row_orig, row_est) in enumerate(zip(src_data_orig, src_data_est)):
+        error_norm[ri] = np.linalg.norm(row_orig - row_est)
+
+    return error_norm
+
+
+def get_largest_dip_positions(data, n_verts, dip_pos):
+    """Function to get spatial centroid of highest activated dipoles
+
+    Parameters
+    ----------
+    data: np.array shape(n_times x n_src)
+        Source estimate data
+    n_verts: int
+        Number of vertices to use when computing maximum activation centroid
+    dip_pos: np.array shape(n_src x 3)
+        3-space positions of all dipoles in source space .
+
+    Returns
+    -------
+    avg_pos: np.array shape(n_times x 3)
+        Euclidean centroid of activation for largest `n_verts` activations
+    """
+
+    #TODO: How to handle negative current vals? Use abs?
+
+    # Initialize
+    largest_dip_pos = np.zeros((data.shape[0], n_verts, 3))
+
+    # Find largest `n_verts` dipoles at each time point and get position
+    for ti in range(data.shape[0]):
+        largest_dip_inds = data[ti, :].argsort()[-n_verts:]
+        largest_dip_pos[ti, :, :] = dip_pos[largest_dip_inds, :]
+
+    return largest_dip_pos
+
+
+def get_localization_metrics(true_pos, largest_dip_pos):
+    """Helper to get accuracy and point spread
+
+    Parameters
+    ----------
+    true_pos: np.array shape(n_times, 3)
+        3D position of dipole that was simulated active
+    largest_dip_pos: np.array shape(n_times, n_dipoles, 3)
+        3D positions of top `n_dipoles` dipoles with highest activation"""
+
+    centroids = np.mean(largest_dip_pos, axis=1)
+    accuracy = np.linalg.norm(true_pos - centroids)
+
+    # Calculate difference in x/y/z positions from true activation to each src
+    point_distance = np.subtract(largest_dip_pos, true_pos[:, np.newaxis, :])
+
+    # Calculate Euclidean distance (w/ norm) and take mean over all dipoles
+    point_spread = np.mean(np.linalg.norm(point_distance, axis=-1), axis=-1)
+
+    return accuracy, point_spread

From ff674fabaf699357eb1a7bdda11397c25a9c179b Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Mon, 12 Sep 2016 10:29:06 -0700
Subject: [PATCH 05/15] Add summaries for tensorboard

---
 scratch/shallow.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index fc79c47..8d089b1 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -83,6 +83,11 @@ def make_tanh_network(sensor_dim, source_dim):
     b3 = bias_variable([source_dim], name='b3')
     yhat = tf.nn.tanh(tf.matmul(h2, W3) + b3)
 
+    # Attach histogram summaries to weight functions
+    tf.histogram_summary('W1 Hist', W1)
+    tf.histogram_summary('W2 Hist', W2)
+    tf.histogram_summary('W3 Hist', W3)
+
     return yhat, h1, h2, x_sensor
 
 
@@ -108,6 +113,10 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
 
     cost = error + lam * regularizer
 
+    # Attach summaries
+    tf.scalar_summary('error', error)
+    tf.scalar_summary('cost function', cost)
+
     return cost, y_source, rho, lam
 
 if __name__ == "__main__":
@@ -130,7 +139,7 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     noise_snr = np.inf
     batch_size = 1000
 
-    n_iter = int(100000)
+    n_iter = int(500000)
     rho = 0.05
     lam = 1.
 
@@ -161,6 +170,8 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim,
                                                              source_dim, yhat,
                                                              h1, h2, sess)
+    merged_summaries = tf.merge_all_summaries()
+    train_writer = tf.train.SummaryWriter('./train_summaries', sess.graph)
 
     train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost)
 
@@ -178,10 +189,14 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
         # Take training step
         feed_dict = {x_sensor: x_sens_batch, y_source: y_src_batch,
                      tf_rho: rho, tf_lam: lam}
-        _, obj = sess.run([train_step, sparse_cost], feed_dict)
 
         if ii % 10 == 0:
+            _, obj, summary = sess.run([train_step, sparse_cost,
+                                        merged_summaries], feed_dict)
+            train_writer.add_summary(summary, ii)
             print("\titer: %d, cost: %.2f" % (ii, obj))
+        else:
+            _, obj = sess.run([train_step, sparse_cost], feed_dict)
 
     if save_network:
         saver.save(sess, 'model_{}'.format(struct))

From e207dee62c907a28b12b037c5ce6ec9ced518120 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Mon, 12 Sep 2016 10:30:07 -0700
Subject: [PATCH 06/15] Reformulate and cleanup evaluation code

---
 scratch/eval_model.py | 96 ++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/scratch/eval_model.py b/scratch/eval_model.py
index 5a47aa3..b4c1df2 100644
--- a/scratch/eval_model.py
+++ b/scratch/eval_model.py
@@ -19,7 +19,9 @@
 import os.path as op
 
 import mne
+from mne import SourceEstimate
 from mne.simulation import simulate_sparse_stc, simulate_stc, simulate_evoked
+from mne.minimum_norm import apply_inverse
 
 import tensorflow as tf
 import numpy as np
@@ -62,11 +64,12 @@
         subj = subjects[structurals.index(struct)]
 
     # Set params
-    n_data_times = 1000
     dt = 0.001
     noise_snr = np.inf
     rho = 0.05
     lam = 1.
+    n_avg_verts = 25  # Number of verts to avg when determining est position
+    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
 
     sess = tf.Session()
 
@@ -80,6 +83,7 @@
     # Get subject info and create data
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
+    vert_list = [fwd['src'][0]['vertno'], fwd['src'][1]['vertno']]
     n_verts = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
 
     # Simulate identity activations
@@ -100,61 +104,59 @@
     saver.restore(sess, 'model_AKCLEE_107')
 
     #
-    # Evaluate network
+    # Evaluate deep learning network
     #
-    x_sens = np.ascontiguousarray(evoked.data.T)
-    y_src = np.ascontiguousarray(stc.data.T)
-
-    feed_dict = {x_sensor: x_sens, y_source: y_src, tf_rho: rho, tf_lam: lam}
-    src_est = sess.run(yhat, feed_dict)
 
-    # Calculate vector norm error
-    error_norm = eval_error_norm(y_src, src_est)
-    dip_positions = get_all_vert_positions(inv['src'])
-
-    # Ground truth pos
-    #activation_positions = dip_positions[rand_verts, :]
-    activation_positions = dip_positions[:500, :]
-    # Get position of most active dipoles
-    largest_dip_positions = get_largest_dip_positions(src_est, 25,
-                                                      dip_positions)
-    # Calculate accuracy metrics (in meters)
-    accuracy, point_spread = get_localization_metrics(activation_positions,
-                                                      largest_dip_positions)
-    print('Accuracy: {}, Avg. Point spread: {}'.format(accuracy,
-                                                       np.mean(point_spread)))
-
-    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
-
-    print("\nEvaluating...\n")
+    print("\nEvaluating deep learning approach...\n")
 
-    # Simulate identity activations
+    # Simulate unit dipole activations
     #rand_verts = np.sort(np.random.randint(0, n_verts, n_eval_data_times))
     #sim_vert_eval_data = np.eye(n_verts)[:, rand_verts] * 0.1
-    sim_vert_eval_data = np.eye(n_verts)[:, :500] * 0.1
-    evoked_eval, stc_eval = gen_evoked_subject(sim_vert_eval_data, fwd, cov,
-                                               evoked_info, dt, noise_snr)
+    sim_vert_data = np.eye(n_verts)[:, :n_eval_data_times]
+    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
+                                     noise_snr)
+
+    x_sens = np.ascontiguousarray(evoked.data.T)
+    y_src = np.ascontiguousarray(stc.data.T)
+    #TODO: normalize data? Transfer normalization multipliers? (maybe with sklearn)
 
-    x_sens_eval = np.ascontiguousarray(evoked_eval.data.T)
-    y_src_eval = np.ascontiguousarray(stc_eval.data.T)
+    # Ground truth dipole positions
+    vert_positions = get_all_vert_positions(inv['src'])
+    true_act_positions = vert_positions[:n_eval_data_times, :]
 
-    feed_dict = {x_sensor: x_sens_eval, y_source: y_src_eval,
-                 tf_rho: rho, tf_lam: lam}
-    src_est = sess.run(yhat, feed_dict)
+    feed_dict = {x_sensor: x_sens, y_source: y_src, tf_rho: rho, tf_lam: lam}
+    src_est_dl = sess.run(yhat, feed_dict)
+    stc_dl = SourceEstimate(src_est_dl.T, vertices=vert_list, subject=struct,
+                            tmin=0, tstep=0.001)
 
     # Calculate vector norm error
-    error_norm = eval_error_norm(y_src_eval, src_est)
+    error_norm_dl = eval_error_norm(y_src, src_est_dl)
 
-    dip_positions = get_all_vert_positions(inv['src'])
+    # Get position of most active dipoles and calc accuracy metrics (in meters)
+    est_act_positions = get_largest_dip_positions(src_est_dl, n_avg_verts,
+                                                  vert_positions)
+    accuracy_dl, point_spread_dl = get_localization_metrics(true_act_positions,
+                                                            est_act_positions)
 
-    # Ground truth pos
-    activation_positions = dip_positions[:500, :]
-    # Get position of most active dipoles
-    largest_dip_positions = get_largest_dip_positions(src_est, 25,
-                                                      dip_positions)
-    # Calculate accuracy metrics (in meters)
-    accuracy, point_spread = get_localization_metrics(activation_positions,
-                                                      largest_dip_positions)
-    print('Accuracy: {}, Avg. Point spread: {}'.format(accuracy,
-                                                       np.mean(point_spread)))
+    print("\nEvaluating deep learning approach...\n")
+    #
+    # Evaluate standard MNE methods
+    #
+    stc_mne = apply_inverse(evoked, inv, method='sLORETA')
+    src_est_mne = stc_mne.data.T
 
+    # Calculate vector norm error
+    error_norm_mne = eval_error_norm(y_src, src_est_mne)
+    est_act_positions = get_largest_dip_positions(src_est_mne, n_avg_verts,
+                                                  vert_positions)
+    accuracy_mne, point_spread_mne = get_localization_metrics(true_act_positions,
+                                                              est_act_positions)
+
+    print('\bDeep learning; error norm average for {} verts: {:0.4f}'.format(
+        n_eval_data_times, np.mean(error_norm_dl)))
+    print('Linear method: error norm average for {} verts: {:0.4f}\n'.format(
+        n_eval_data_times, np.mean(error_norm_mne)))
+    print('Deep learning; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}'.format(
+        accuracy_dl, np.mean(point_spread_dl)))
+    print('Linear method; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}\n'.format(
+        accuracy_mne, np.mean(point_spread_mne)))

From 5700b0aef7d6c55d7c69eb080f765923d9b22916 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Wed, 14 Sep 2016 16:27:03 -0700
Subject: [PATCH 07/15] Functionalize several things, add sparse activation
 data

---
 scratch/shallow.py | 106 +++++++++++++++++++++++++++++----------------
 1 file changed, 69 insertions(+), 37 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index 8d089b1..8b9ff4e 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -20,6 +20,7 @@
 
 import tensorflow as tf
 import numpy as np
+from scipy.sparse import random as random_sparse
 
 import mne
 from mne import SourceEstimate
@@ -30,7 +31,7 @@
 from shallow_fun import (load_subject_objects, gen_evoked_subject,
                          get_data_batch, get_all_vert_positions,
                          get_largest_dip_positions, get_localization_metrics,
-                         eval_error_norm)
+                         eval_error_norm, norm_transpose)
 
 # Entries in structurals and subjects must correspoond,
 # i.e. structurals[i] === subjects[i].
@@ -79,19 +80,31 @@ def make_tanh_network(sensor_dim, source_dim):
     b2 = bias_variable([source_dim // 2], name='b2')
     h2 = tf.nn.tanh(tf.matmul(h1, W2) + b2)
 
-    W3 = weight_variable([source_dim // 2, source_dim], name='W3')
-    b3 = bias_variable([source_dim], name='b3')
-    yhat = tf.nn.tanh(tf.matmul(h2, W3) + b3)
+    W3 = weight_variable([source_dim // 2, source_dim // 4], name='W3')
+    b3 = bias_variable([source_dim // 4], name='b3')
+    h3 = tf.nn.tanh(tf.matmul(h2, W3) + b3)
+
+    W4 = weight_variable([source_dim // 4, source_dim], name='W4')
+    b4 = bias_variable([source_dim], name='b4')
+    yhat = tf.nn.tanh(tf.matmul(h3, W4) + b4)
 
     # Attach histogram summaries to weight functions
     tf.histogram_summary('W1 Hist', W1)
     tf.histogram_summary('W2 Hist', W2)
     tf.histogram_summary('W3 Hist', W3)
+    tf.histogram_summary('W4 Hist', W4)
+
+    h_list = [h1, h2, h3]
+
+    return yhat, h_list, x_sensor
 
-    return yhat, h1, h2, x_sensor
 
+def bernoulli(a_obj, rho):
+    return (rho * (tf.log(rho) - tf.log(a_obj + 1e-6) + (1 - rho) *
+                   (tf.log(1 - rho) - tf.log(1 - a_obj + 1e-6))))
 
-def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
+
+def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
 
     y_source = tf.placeholder(tf.float32, shape=[None, source_dim], name="y_source")
     rho = tf.placeholder(tf.float32, shape=(), name="rho")
@@ -101,15 +114,12 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
     error = tf.reduce_sum(tf.squared_difference(y_source, yhat))
 
     # Remap activations to [0,1]
-    a1 = 0.5 * h1 + 0.5
-    a2 = 0.5 * h2 + 0.5
+    a_list = [0.5 * h_obj + 0.5 for h_obj in h_list]
+
+    kl_bernoulli_list = [bernoulli(a_obj, rho) for a_obj in a_list]
 
-    kl_bernoulli_h1 = (rho * (tf.log(rho) - tf.log(a1 + 1e-6) + (1 - rho) *
-                              (tf.log(1 - rho) - tf.log(1 - a1 + 1e-6))))
-    kl_bernoulli_h2 = (rho * (tf.log(rho) - tf.log(a2 + 1e-6) + (1 - rho) *
-                              (tf.log(1 - rho) - tf.log(1 - a2 + 1e-6))))
-    regularizer = (tf.reduce_sum(kl_bernoulli_h1)
-                   + tf.reduce_sum(kl_bernoulli_h2))
+    regularizer = sum([tf.reduce_sum(kl_bernoulli_h)
+                       for kl_bernoulli_h in kl_bernoulli_list])
 
     cost = error + lam * regularizer
 
@@ -134,42 +144,63 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
         subj = subjects[structurals.index(struct)]
 
     # Set params
-    n_data_times = 50000
+    n_training_times_noise = 10000
+    n_training_times_sparse = 10000
+    n_training_times = n_training_times_noise + n_training_times_sparse
+
     dt = 0.001
-    noise_snr = np.inf
-    batch_size = 1000
+    SNR = np.inf
+    batch_size = 500
 
-    n_iter = int(500000)
+    n_iter = int(5000)
     rho = 0.05
     lam = 1.
 
     save_network = True
     fpath_save = op.join('model_subj_{}_iters.meta'.format(n_iter))
+    n_training_times = n_training_times_noise + n_training_times_sparse
 
     # Get subject info and create data
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
-    n_verts = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
-    sim_vert_data = np.random.randn(n_verts, n_data_times)
-
-    print("Simulating and normalizing data")
-    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
-                                     noise_snr)
-    # Normalize data to lie between -1 and 1
-    x_sens_all = np.ascontiguousarray(evoked.data.T)
-    x_sens_all /= np.max(np.abs(x_sens_all))
-    y_src_all = np.ascontiguousarray(sim_vert_data.T)
-    y_src_all /= np.max(np.abs(y_src_all))
+    sensor_dim = len(fwd['info']['ch_names'])
+    source_dim = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
+
+    sparse_dens = 1. / source_dim  # Density of non-zero vals in sparse training data
+    sparse_dist = np.random.randn
+
+    noise_data = np.random.randn(source_dim, n_training_times_noise)
+    sparse_data = random_sparse(source_dim, n_training_times_sparse,
+                                sparse_dens, random_state=0,
+                                dtype=np.float32, data_rvs=sparse_dist).toarray()
+    sim_train_data = np.concatenate((noise_data, sparse_data), axis=1)
+
+    print("Simulating and normalizing training data")
+    evoked, stc = gen_evoked_subject(sim_train_data, fwd, cov, evoked_info, dt,
+                                     SNR)
+    # Normalize training data to lie between -1 and 1
+    x_train = norm_transpose(evoked.data)
+    y_train = norm_transpose(sim_train_data)
+
+    """
+    print("Simulating and normalizing testing data")
+    evoked, stc = gen_evoked_subject(sim_test_data, fwd, cov, evoked_info, dt,
+                                     SNR)
+    # Normalize testing data to lie between -1 and 1
+    x_test = norm_transpose(evoked.data)
+    y_test = norm_transpose(sim_vert_data)
+
+    val_monitor = tf.contrib.learn.monitors.ValidationMonitor(
+        x_test, y_test, every_n_steps=20)
+    """
 
     print("Building neural net")
     sess = tf.Session()
 
-    sensor_dim = evoked.data.shape[0]
-    source_dim = n_verts
-    yhat, h1, h2, x_sensor = make_tanh_network(sensor_dim, source_dim)
+    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim)
     sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim,
                                                              source_dim, yhat,
-                                                             h1, h2, sess)
+                                                             h_list, sess)
     merged_summaries = tf.merge_all_summaries()
     train_writer = tf.train.SummaryWriter('./train_summaries', sess.graph)
 
@@ -177,19 +208,20 @@ def sparse_objective(sensor_dim, source_dim, yhat, h1, h2, sess):
 
     saver = tf.train.Saver()
     sess.run(tf.initialize_all_variables())
-    print("\nSim params\n----------\nn_iter: {}\nn_data_points: {}\nSNR: \
-          {}\nbatch_size: {}\n".format(n_iter, n_data_times, str(noise_snr),
-                                       batch_size))
+    print("\nSim params\n----------\nn_iter: {}\nn_training_times: {}\nSNR: \
+          {}\nbatch_size: {}\n".format(n_iter, n_training_times,
+                                       str(SNR), batch_size))
     print("Optimizing...")
     for ii in xrange(n_iter):
         # Get random batch of data
-        x_sens_batch, y_src_batch = get_data_batch(x_sens_all, y_src_all, ii,
+        x_sens_batch, y_src_batch = get_data_batch(x_train, y_train, ii,
                                                    batch_size)
 
         # Take training step
         feed_dict = {x_sensor: x_sens_batch, y_source: y_src_batch,
                      tf_rho: rho, tf_lam: lam}
 
+        # Save summaries for tensorboard every 10 steps
         if ii % 10 == 0:
             _, obj, summary = sess.run([train_step, sparse_cost,
                                         merged_summaries], feed_dict)

From 1d3c948d59d52cf4f330b9a5548e5b787e769a48 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Wed, 14 Sep 2016 16:30:25 -0700
Subject: [PATCH 08/15] Add function to normalize data

---
 scratch/shallow_fun.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/scratch/shallow_fun.py b/scratch/shallow_fun.py
index 4b58e55..9d7416f 100644
--- a/scratch/shallow_fun.py
+++ b/scratch/shallow_fun.py
@@ -18,24 +18,25 @@
 from mne.simulation import simulate_sparse_stc, simulate_stc, simulate_evoked
 
 
-def load_subject_objects(megdatadir, subj, struct):
+def load_subject_objects(megdatadir, subj, struct, verbose=False):
 
     print("  %s: -- loading meg objects" % subj)
 
     fname_fwd = op.join(megdatadir, subj, 'forward',
                         '%s-sss-fwd.fif' % subj)
-    fwd = mne.read_forward_solution(fname_fwd, force_fixed=True, surf_ori=True)
+    fwd = mne.read_forward_solution(fname_fwd, force_fixed=True, surf_ori=True,
+                                    verbose=verbose)
 
     fname_inv = op.join(megdatadir, subj, 'inverse',
                         '%s-55-sss-meg-eeg-fixed-inv.fif' % subj)
-    inv = read_inverse_operator(fname_inv)
+    inv = read_inverse_operator(fname_inv, verbose=verbose)
 
     fname_epochs = op.join(megdatadir, subj, 'epochs',
                            'All_55-sss_%s-epo.fif' % subj)
     #epochs = mne.read_epochs(fname_epochs)
     #evoked = epochs.average()
     #evoked_info = evoked.info
-    evoked_info = mne.io.read_info(fname_epochs)
+    evoked_info = mne.io.read_info(fname_epochs, verbose=verbose)
     cov = inv['noise_cov']
 
     print("  %s: -- finished loading meg objects" % subj)
@@ -57,6 +58,16 @@ def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
     return evoked, stc
 
 
+def norm_transpose(data):
+    """Helper to transpose data and normalize by max val"""
+    #XXX Probably should switch to sklearn's standard scaler
+        # (0 mean, unit var, and saves the scaling if we need to apply it later)
+    data_fixed = np.ascontiguousarray(data.T)
+    data_fixed /= np.max(np.abs(data_fixed))
+
+    return data_fixed
+
+
 def get_data_batch(x_data, y_label, batch_num, batch_size):
     """Function to get a random sampling of an evoked and stc pair"""
 
@@ -161,7 +172,15 @@ def get_localization_metrics(true_pos, largest_dip_pos):
     true_pos: np.array shape(n_times, 3)
         3D position of dipole that was simulated active
     largest_dip_pos: np.array shape(n_times, n_dipoles, 3)
-        3D positions of top `n_dipoles` dipoles with highest activation"""
+        3D positions of top `n_dipoles` dipoles with highest activation
+
+    Returns
+    -------
+    accuracy: np.array
+
+    point_spread: float
+
+    """
 
     centroids = np.mean(largest_dip_pos, axis=1)
     accuracy = np.linalg.norm(true_pos - centroids)

From c971cc6ecb36c039005b1d653cab6557cf4f70e2 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Wed, 14 Sep 2016 17:00:30 -0700
Subject: [PATCH 09/15] Check random verts, standardize variables

---
 scratch/eval_model.py | 57 +++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 35 deletions(-)

diff --git a/scratch/eval_model.py b/scratch/eval_model.py
index b4c1df2..51bdc97 100644
--- a/scratch/eval_model.py
+++ b/scratch/eval_model.py
@@ -28,7 +28,8 @@
 
 from shallow_fun import (load_subject_objects, gen_evoked_subject,
                          get_all_vert_positions, get_largest_dip_positions,
-                         get_localization_metrics, eval_error_norm)
+                         get_localization_metrics, eval_error_norm,
+                         norm_transpose)
 from shallow import make_tanh_network, sparse_objective
 
 
@@ -69,68 +70,52 @@
     rho = 0.05
     lam = 1.
     n_avg_verts = 25  # Number of verts to avg when determining est position
-    n_eval_data_times = 1000  # Probably should be <= 1000 to avoid mem problems
+    n_test_verts = 1000  # Probably should be <= 1000 to avoid mem problems
 
     sess = tf.Session()
 
-    #saver = tf.train.import_meta_graph('model_AKCLEE_107.meta')
-    #saver.restore(sess, model_fname.split('.')[:-1])
-    #yhat = tf.get_collection(['yhat'][0])
-
-    print('\nLoaded saved model: %s\n' % model_fname)
-    print("\nEvaluating...\n")
-
     # Get subject info and create data
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
     vert_list = [fwd['src'][0]['vertno'], fwd['src'][1]['vertno']]
     n_verts = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
 
-    # Simulate identity activations
-    sim_vert_data = np.eye(n_verts)[:, :500] * 0.1
-    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
-                                     noise_snr)
-
     print("Simulating and normalizing data")
-    sens_dim = evoked.data.shape[0]
-    src_dim = n_verts
+    sensor_dim = len(fwd['info']['ch_names'])
+    source_dim = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
 
+    print("Reconstructing model and restoring saved weights")
     # Reconstruct network
-    yhat, h1, h2, x_sensor = make_tanh_network(sens_dim, src_dim)
-    sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sens_dim, src_dim,
-                                                             yhat, h1, h2,
+    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim)
+    sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim, source_dim,
+                                                             yhat, h_list,
                                                              sess)
     saver = tf.train.Saver()
     saver.restore(sess, 'model_AKCLEE_107')
 
-    #
-    # Evaluate deep learning network
-    #
-
     print("\nEvaluating deep learning approach...\n")
 
     # Simulate unit dipole activations
-    #rand_verts = np.sort(np.random.randint(0, n_verts, n_eval_data_times))
-    #sim_vert_eval_data = np.eye(n_verts)[:, rand_verts] * 0.1
-    sim_vert_data = np.eye(n_verts)[:, :n_eval_data_times]
+    rand_verts = np.sort(np.random.randint(0, n_verts, n_test_verts))
+    sim_vert_data = np.eye(n_verts)[:, rand_verts]
     evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
                                      noise_snr)
 
-    x_sens = np.ascontiguousarray(evoked.data.T)
-    y_src = np.ascontiguousarray(stc.data.T)
     #TODO: normalize data? Transfer normalization multipliers? (maybe with sklearn)
+    x_test = norm_transpose(evoked.data)
+    y_test = norm_transpose(stc.data)
 
     # Ground truth dipole positions
     vert_positions = get_all_vert_positions(inv['src'])
-    true_act_positions = vert_positions[:n_eval_data_times, :]
+    true_act_positions = vert_positions[rand_verts, :]
 
-    feed_dict = {x_sensor: x_sens, y_source: y_src, tf_rho: rho, tf_lam: lam}
+    feed_dict = {x_sensor: x_test, y_source: y_test, tf_rho: rho, tf_lam: lam}
     src_est_dl = sess.run(yhat, feed_dict)
     stc_dl = SourceEstimate(src_est_dl.T, vertices=vert_list, subject=struct,
                             tmin=0, tstep=0.001)
 
     # Calculate vector norm error
-    error_norm_dl = eval_error_norm(y_src, src_est_dl)
+    error_norm_dl = eval_error_norm(y_test, src_est_dl)
 
     # Get position of most active dipoles and calc accuracy metrics (in meters)
     est_act_positions = get_largest_dip_positions(src_est_dl, n_avg_verts,
@@ -138,7 +123,7 @@
     accuracy_dl, point_spread_dl = get_localization_metrics(true_act_positions,
                                                             est_act_positions)
 
-    print("\nEvaluating deep learning approach...\n")
+    print("\nEvaluating standard linear approach...\n")
     #
     # Evaluate standard MNE methods
     #
@@ -146,16 +131,18 @@
     src_est_mne = stc_mne.data.T
 
     # Calculate vector norm error
-    error_norm_mne = eval_error_norm(y_src, src_est_mne)
+    error_norm_mne = eval_error_norm(y_test, src_est_mne)
     est_act_positions = get_largest_dip_positions(src_est_mne, n_avg_verts,
                                                   vert_positions)
     accuracy_mne, point_spread_mne = get_localization_metrics(true_act_positions,
                                                               est_act_positions)
 
+    '''
     print('\bDeep learning; error norm average for {} verts: {:0.4f}'.format(
-        n_eval_data_times, np.mean(error_norm_dl)))
+        n_test_verts, np.mean(error_norm_dl)))
     print('Linear method: error norm average for {} verts: {:0.4f}\n'.format(
-        n_eval_data_times, np.mean(error_norm_mne)))
+        n_test_verts, np.mean(error_norm_mne)))
+        '''
     print('Deep learning; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}'.format(
         accuracy_dl, np.mean(point_spread_dl)))
     print('Linear method; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}\n'.format(

From 73c368c1377377ed005e2481d2e4da971375ed72 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Fri, 16 Sep 2016 12:50:14 -0700
Subject: [PATCH 10/15] Fix rho function

---
 scratch/shallow.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index 8b9ff4e..609300c 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -72,19 +72,19 @@ def make_tanh_network(sensor_dim, source_dim):
     x_sensor = tf.placeholder(tf.float32, shape=[None, sensor_dim],
                               name="x_sensor")
 
-    W1 = weight_variable([sensor_dim, source_dim // 2], name='W1')
-    b1 = bias_variable([source_dim // 2], name='b1')
+    W1 = weight_variable([sensor_dim, source_dim // 4], name='W1')
+    b1 = bias_variable([source_dim // 4], name='b1')
     h1 = tf.nn.tanh(tf.matmul(x_sensor, W1) + b1)
 
-    W2 = weight_variable([source_dim // 2, source_dim // 2], name='W2')
+    W2 = weight_variable([source_dim // 4, source_dim // 2], name='W2')
     b2 = bias_variable([source_dim // 2], name='b2')
     h2 = tf.nn.tanh(tf.matmul(h1, W2) + b2)
 
-    W3 = weight_variable([source_dim // 2, source_dim // 4], name='W3')
-    b3 = bias_variable([source_dim // 4], name='b3')
+    W3 = weight_variable([source_dim // 2, source_dim // 2], name='W3')
+    b3 = bias_variable([source_dim // 2], name='b3')
     h3 = tf.nn.tanh(tf.matmul(h2, W3) + b3)
 
-    W4 = weight_variable([source_dim // 4, source_dim], name='W4')
+    W4 = weight_variable([source_dim // 2, source_dim], name='W4')
     b4 = bias_variable([source_dim], name='b4')
     yhat = tf.nn.tanh(tf.matmul(h3, W4) + b4)
 
@@ -99,9 +99,11 @@ def make_tanh_network(sensor_dim, source_dim):
     return yhat, h_list, x_sensor
 
 
-def bernoulli(a_obj, rho):
-    return (rho * (tf.log(rho) - tf.log(a_obj + 1e-6) + (1 - rho) *
-                   (tf.log(1 - rho) - tf.log(1 - a_obj + 1e-6))))
+def bernoulli(act, rho):
+    """Helper to calculate sparsity penalty based on KL divergence"""
+
+    return (rho * (tf.log(rho) - tf.log(act + 1e-6)) +
+            (1 - rho) * (tf.log(1 - rho) - tf.log(1 - act + 1e-6)))
 
 
 def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
@@ -114,9 +116,9 @@ def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
     error = tf.reduce_sum(tf.squared_difference(y_source, yhat))
 
     # Remap activations to [0,1]
-    a_list = [0.5 * h_obj + 0.5 for h_obj in h_list]
+    act_list = [0.5 * h_obj + 0.5 for h_obj in h_list]
 
-    kl_bernoulli_list = [bernoulli(a_obj, rho) for a_obj in a_list]
+    kl_bernoulli_list = [bernoulli(act, rho) for act in act_list]
 
     regularizer = sum([tf.reduce_sum(kl_bernoulli_h)
                        for kl_bernoulli_h in kl_bernoulli_list])
@@ -144,16 +146,16 @@ def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
         subj = subjects[structurals.index(struct)]
 
     # Set params
-    n_training_times_noise = 10000
-    n_training_times_sparse = 10000
+    n_training_times_noise = 25000
+    n_training_times_sparse = 25000
     n_training_times = n_training_times_noise + n_training_times_sparse
 
     dt = 0.001
     SNR = np.inf
-    batch_size = 500
+    batch_size = 1000
 
-    n_iter = int(5000)
-    rho = 0.05
+    n_iter = int(100000)
+    rho = 0.1
     lam = 1.
 
     save_network = True

From becd44bc55b3e84396ff9e76bae6de403bc0d44b Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Tue, 15 Nov 2016 16:32:56 -0800
Subject: [PATCH 11/15] Use config file, rename vars for clarity

---
 scratch/eval_model.py | 76 +++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

diff --git a/scratch/eval_model.py b/scratch/eval_model.py
index 51bdc97..c2e550f 100644
--- a/scratch/eval_model.py
+++ b/scratch/eval_model.py
@@ -4,8 +4,8 @@
     signals. Tests localization error and point spread
 
     Usage:
-      shallow.py <megdir> <structdir> <nn_fname> [--subj=<subj>]
-      shallow.py (-h | --help)
+      eval_model.py <megdir> <structdir> <nn_fname> [--subj=<subj>]
+      eval_model.py (-h | --help)
 
     Options:
       --subj=<subj>     Specify subject to process with structural name
@@ -31,22 +31,11 @@
                          get_localization_metrics, eval_error_norm,
                          norm_transpose)
 from shallow import make_tanh_network, sparse_objective
-
-
-# Entries in structurals and subjects must correspoond,
-# i.e. structurals[i] === subjects[i].
-structurals = ['AKCLEE_103', 'AKCLEE_104', 'AKCLEE_105', 'AKCLEE_106',
-               'AKCLEE_107', 'AKCLEE_109', 'AKCLEE_110', 'AKCLEE_115',
-               'AKCLEE_117', 'AKCLEE_118', 'AKCLEE_119', 'AKCLEE_121',
-               'AKCLEE_125', 'AKCLEE_126', 'AKCLEE_131', 'AKCLEE_132']
-subjects = ['eric_sps_03', 'eric_sps_04', 'eric_sps_05', 'eric_sps_06',
-            'eric_sps_07', 'eric_sps_09', 'eric_sps_10', 'eric_sps_15',
-            'eric_sps_17', 'eric_sps_18', 'eric_sps_19', 'eric_sps_21',
-            'eric_sps_25', 'eric_sps_26', 'eric_sps_31', 'eric_sps_32']
+from config import common_params, eval_params, conf_structurals, conf_subjects
 
 # Removing eric_sps_32/AKCL_132 b/c of vertex issue
-structurals = structurals[:-1]
-subjects = subjects[:-1]
+structurals = conf_structurals[:-1]
+subjects = conf_subjects[:-1]
 
 
 if __name__ == "__main__":
@@ -64,13 +53,10 @@
         struct = argv['--subj']
         subj = subjects[structurals.index(struct)]
 
-    # Set params
-    dt = 0.001
-    noise_snr = np.inf
-    rho = 0.05
-    lam = 1.
-    n_avg_verts = 25  # Number of verts to avg when determining est position
-    n_test_verts = 1000  # Probably should be <= 1000 to avoid mem problems
+    # Number of verts to avg when determining est position
+    n_avg_verts = eval_params['n_avg_verts']
+    # Probably should be <= 1000 to avoid mem problems
+    n_test_verts = eval_params['n_test_verts']
 
     sess = tf.Session()
 
@@ -86,22 +72,26 @@
 
     print("Reconstructing model and restoring saved weights")
     # Reconstruct network
-    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim)
+    network_dims = [source_dim // 2, source_dim // 2, source_dim]
+    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim, network_dims)
     sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim, source_dim,
                                                              yhat, h_list,
                                                              sess)
     saver = tf.train.Saver()
-    saver.restore(sess, 'model_AKCLEE_107')
+    saver.restore(sess, model_fname)
 
     print("\nEvaluating deep learning approach...\n")
 
     # Simulate unit dipole activations
-    rand_verts = np.sort(np.random.randint(0, n_verts, n_test_verts))
+
+    rand_verts = np.sort(np.random.choice(range(n_verts), n_test_verts,
+                                          replace=False))
     sim_vert_data = np.eye(n_verts)[:, rand_verts]
-    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info, dt,
-                                     noise_snr)
+    evoked, stc = gen_evoked_subject(sim_vert_data, fwd, cov, evoked_info,
+                                     common_params['dt'],
+                                     common_params['SNR'])
 
-    #TODO: normalize data? Transfer normalization multipliers? (maybe with sklearn)
+    # Normalize data and transpose so it's (n_observations x n_chan)
     x_test = norm_transpose(evoked.data)
     y_test = norm_transpose(stc.data)
 
@@ -109,10 +99,11 @@
     vert_positions = get_all_vert_positions(inv['src'])
     true_act_positions = vert_positions[rand_verts, :]
 
-    feed_dict = {x_sensor: x_test, y_source: y_test, tf_rho: rho, tf_lam: lam}
+    feed_dict = {x_sensor: x_test, y_source: y_test,
+                 tf_rho: common_params['rho'], tf_lam: common_params['lam']}
     src_est_dl = sess.run(yhat, feed_dict)
     stc_dl = SourceEstimate(src_est_dl.T, vertices=vert_list, subject=struct,
-                            tmin=0, tstep=0.001)
+                            tmin=0, tstep=common_params['dt'])
 
     # Calculate vector norm error
     error_norm_dl = eval_error_norm(y_test, src_est_dl)
@@ -127,23 +118,22 @@
     #
     # Evaluate standard MNE methods
     #
-    stc_mne = apply_inverse(evoked, inv, method='sLORETA')
-    src_est_mne = stc_mne.data.T
+    stc_std = apply_inverse(evoked, inv, method=eval_params['linear_inv'])
+    src_est_std = stc_std.data.T
 
     # Calculate vector norm error
-    error_norm_mne = eval_error_norm(y_test, src_est_mne)
-    est_act_positions = get_largest_dip_positions(src_est_mne, n_avg_verts,
+    error_norm_std = eval_error_norm(y_test, src_est_std)
+    est_act_positions = get_largest_dip_positions(src_est_std, n_avg_verts,
                                                   vert_positions)
-    accuracy_mne, point_spread_mne = get_localization_metrics(true_act_positions,
+    accuracy_std, point_spread_std = get_localization_metrics(true_act_positions,
                                                               est_act_positions)
 
-    '''
-    print('\bDeep learning; error norm average for {} verts: {:0.4f}'.format(
+    sess.close()
+    print('\bShallow; error norm average for {} verts: {:0.4f}'.format(
         n_test_verts, np.mean(error_norm_dl)))
     print('Linear method: error norm average for {} verts: {:0.4f}\n'.format(
-        n_test_verts, np.mean(error_norm_mne)))
-        '''
-    print('Deep learning; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}'.format(
+        n_test_verts, np.mean(error_norm_std)))
+    print('Shallow; Loc. accuracy: {:0.5f}, Avg. Point spread: {:0.5f}'.format(
         accuracy_dl, np.mean(point_spread_dl)))
-    print('Linear method; Accuracy: {:0.5f}, Avg. Point spread: {:0.5f}\n'.format(
-        accuracy_mne, np.mean(point_spread_mne)))
+    print('Linear method; Loc. accuracy: {:0.5f}, Avg. Point spread: {:0.5f}\n'.format(
+        accuracy_std, np.mean(point_spread_std)))

From 70c07ea5f6dc52fc1399c562bf68e55c05f99826 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Tue, 15 Nov 2016 16:33:44 -0800
Subject: [PATCH 12/15] Use config file, streamline how network is instantiated

---
 scratch/shallow.py | 125 ++++++++++++++++++---------------------------
 1 file changed, 50 insertions(+), 75 deletions(-)

diff --git a/scratch/shallow.py b/scratch/shallow.py
index 609300c..b423202 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -32,25 +32,17 @@
                          get_data_batch, get_all_vert_positions,
                          get_largest_dip_positions, get_localization_metrics,
                          eval_error_norm, norm_transpose)
-
-# Entries in structurals and subjects must correspoond,
-# i.e. structurals[i] === subjects[i].
-structurals = ['AKCLEE_103', 'AKCLEE_104', 'AKCLEE_105', 'AKCLEE_106',
-               'AKCLEE_107', 'AKCLEE_109', 'AKCLEE_110', 'AKCLEE_115',
-               'AKCLEE_117', 'AKCLEE_118', 'AKCLEE_119', 'AKCLEE_121',
-               'AKCLEE_125', 'AKCLEE_126', 'AKCLEE_131', 'AKCLEE_132']
-subjects = ['eric_sps_03', 'eric_sps_04', 'eric_sps_05', 'eric_sps_06',
-            'eric_sps_07', 'eric_sps_09', 'eric_sps_10', 'eric_sps_15',
-            'eric_sps_17', 'eric_sps_18', 'eric_sps_19', 'eric_sps_21',
-            'eric_sps_25', 'eric_sps_26', 'eric_sps_31', 'eric_sps_32']
+from config import (common_params, training_params, conf_structurals,
+                    conf_subjects)
 
 # Removing eric_sps_32/AKCL_132 b/c of vertex issue
-structurals = structurals[:-1]
-subjects = subjects[:-1]
+structurals = conf_structurals[:-1]
+subjects = conf_subjects[:-1]
+seed = 0
 
 
 def weight_variable(shape, name=None):
-    init = tf.truncated_normal(shape, stddev=0.11)
+    init = tf.truncated_normal(shape, stddev=0.1)
     if name is not None:
         return tf.Variable(init, name=name)
 
@@ -66,37 +58,33 @@ def bias_variable(shape, name=None):
     return tf.Variable(init)
 
 
-def make_tanh_network(sensor_dim, source_dim):
+def make_tanh_network(sensor_dim, source_dim, dims):
     """Function to create neural network"""
 
     x_sensor = tf.placeholder(tf.float32, shape=[None, sensor_dim],
                               name="x_sensor")
 
-    W1 = weight_variable([sensor_dim, source_dim // 4], name='W1')
-    b1 = bias_variable([source_dim // 4], name='b1')
-    h1 = tf.nn.tanh(tf.matmul(x_sensor, W1) + b1)
-
-    W2 = weight_variable([source_dim // 4, source_dim // 2], name='W2')
-    b2 = bias_variable([source_dim // 2], name='b2')
-    h2 = tf.nn.tanh(tf.matmul(h1, W2) + b2)
-
-    W3 = weight_variable([source_dim // 2, source_dim // 2], name='W3')
-    b3 = bias_variable([source_dim // 2], name='b3')
-    h3 = tf.nn.tanh(tf.matmul(h2, W3) + b3)
+    W_list, b_list, h_list = [], [], []
+    dims.insert(0, sensor_dim)  # Augment with input layer dim
 
-    W4 = weight_variable([source_dim // 2, source_dim], name='W4')
-    b4 = bias_variable([source_dim], name='b4')
-    yhat = tf.nn.tanh(tf.matmul(h3, W4) + b4)
+    # Loop through and create network layer at each step
+    for di, (dim1, dim2) in enumerate(zip(dims[:-1], dims[1:])):
+        W_list.append(weight_variable([dim1, dim2], name='W%i' % di))
+        b_list.append(bias_variable([dim2], name='b%i' % di))
 
-    # Attach histogram summaries to weight functions
-    tf.histogram_summary('W1 Hist', W1)
-    tf.histogram_summary('W2 Hist', W2)
-    tf.histogram_summary('W3 Hist', W3)
-    tf.histogram_summary('W4 Hist', W4)
+        # Handle input layer separately
+        if di == 0:
+            h_list.append(tf.nn.tanh(tf.matmul(x_sensor, W_list[-1]) +
+                                     b_list[-1]))
+        else:
+            h_list.append(tf.nn.tanh(tf.matmul(h_list[-1], W_list[-1]) +
+                                     b_list[-1]))
 
-    h_list = [h1, h2, h3]
+        # Attach histogram summaries to weight functions
+        tf.histogram_summary('W%i Hist' % di, W_list[-1])
 
-    return yhat, h_list, x_sensor
+    # Return y_hat (final h_list layer), rest of h_list, and data placeholder
+    return h_list[-1], h_list[:-1], x_sensor
 
 
 def bernoulli(act, rho):
@@ -145,83 +133,68 @@ def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
         struct = argv['--subj']
         subj = subjects[structurals.index(struct)]
 
-    # Set params
-    n_training_times_noise = 25000
-    n_training_times_sparse = 25000
-    n_training_times = n_training_times_noise + n_training_times_sparse
-
-    dt = 0.001
-    SNR = np.inf
-    batch_size = 1000
+    n_training_times = training_params['n_training_times_noise'] + \
+        training_params['n_training_times_sparse']
 
-    n_iter = int(100000)
-    rho = 0.1
-    lam = 1.
+    n_training_iters = training_params['n_training_iters']
 
     save_network = True
-    fpath_save = op.join('model_subj_{}_iters.meta'.format(n_iter))
-    n_training_times = n_training_times_noise + n_training_times_sparse
+    fpath_save = op.join('model_subj_{}_iters.meta'.format(n_training_iters))
+    n_training_times = training_params['n_training_times_noise'] + \
+        training_params['n_training_times_sparse']
 
     # Get subject info and create data
     subj, fwd, inv, cov, evoked_info = load_subject_objects(megdir, subj,
                                                             struct)
     sensor_dim = len(fwd['info']['ch_names'])
     source_dim = fwd['src'][0]['nuse'] + fwd['src'][1]['nuse']
+    network_dims = [source_dim // 2, source_dim // 2, source_dim]
 
     sparse_dens = 1. / source_dim  # Density of non-zero vals in sparse training data
     sparse_dist = np.random.randn
 
-    noise_data = np.random.randn(source_dim, n_training_times_noise)
-    sparse_data = random_sparse(source_dim, n_training_times_sparse,
-                                sparse_dens, random_state=0,
+    noise_data = np.random.randn(source_dim, training_params['n_training_times_noise'])
+    sparse_data = random_sparse(source_dim, training_params['n_training_times_sparse'],
+                                sparse_dens, random_state=seed,
                                 dtype=np.float32, data_rvs=sparse_dist).toarray()
     sim_train_data = np.concatenate((noise_data, sparse_data), axis=1)
 
     print("Simulating and normalizing training data")
-    evoked, stc = gen_evoked_subject(sim_train_data, fwd, cov, evoked_info, dt,
-                                     SNR)
+    evoked, stc = gen_evoked_subject(sim_train_data, fwd, cov, evoked_info, common_params['dt'],
+                                     common_params['SNR'])
     # Normalize training data to lie between -1 and 1
+    # XXX: Appropriate to do this? Maybe need to normalize src space only
+    # before generating sens data
     x_train = norm_transpose(evoked.data)
     y_train = norm_transpose(sim_train_data)
 
-    """
-    print("Simulating and normalizing testing data")
-    evoked, stc = gen_evoked_subject(sim_test_data, fwd, cov, evoked_info, dt,
-                                     SNR)
-    # Normalize testing data to lie between -1 and 1
-    x_test = norm_transpose(evoked.data)
-    y_test = norm_transpose(sim_vert_data)
-
-    val_monitor = tf.contrib.learn.monitors.ValidationMonitor(
-        x_test, y_test, every_n_steps=20)
-    """
-
     print("Building neural net")
     sess = tf.Session()
-
-    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim)
+    yhat, h_list, x_sensor = make_tanh_network(sensor_dim, source_dim, network_dims)
     sparse_cost, y_source, tf_rho, tf_lam = sparse_objective(sensor_dim,
                                                              source_dim, yhat,
                                                              h_list, sess)
     merged_summaries = tf.merge_all_summaries()
     train_writer = tf.train.SummaryWriter('./train_summaries', sess.graph)
 
-    train_step = tf.train.AdamOptimizer(1e-4).minimize(sparse_cost)
+    train_step = tf.train.AdamOptimizer(
+        training_params['opt_lr']).minimize(sparse_cost)
 
     saver = tf.train.Saver()
     sess.run(tf.initialize_all_variables())
     print("\nSim params\n----------\nn_iter: {}\nn_training_times: {}\nSNR: \
-          {}\nbatch_size: {}\n".format(n_iter, n_training_times,
-                                       str(SNR), batch_size))
+          {}\nbatch_size: {}\n".format(n_training_iters, n_training_times,
+                                       str(common_params['SNR']), training_params['batch_size']))
     print("Optimizing...")
-    for ii in xrange(n_iter):
+    for ii in xrange(n_training_iters):
         # Get random batch of data
-        x_sens_batch, y_src_batch = get_data_batch(x_train, y_train, ii,
-                                                   batch_size)
+        x_sens_batch, y_src_batch = get_data_batch(x_train, y_train,
+                                                   training_params['batch_size'], seed=ii)
 
         # Take training step
         feed_dict = {x_sensor: x_sens_batch, y_source: y_src_batch,
-                     tf_rho: rho, tf_lam: lam}
+                     tf_rho: common_params['rho'],
+                     tf_lam: common_params['lam']}
 
         # Save summaries for tensorboard every 10 steps
         if ii % 10 == 0:
@@ -232,5 +205,7 @@ def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
         else:
             _, obj = sess.run([train_step, sparse_cost], feed_dict)
 
+    sess.close()
+
     if save_network:
         saver.save(sess, 'model_{}'.format(struct))

From 9ec99dcd3e3ec10560593c1c5ef67dd47a51cd35 Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Tue, 15 Nov 2016 16:35:03 -0800
Subject: [PATCH 13/15] Minor fixes

---
 scratch/shallow_fun.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scratch/shallow_fun.py b/scratch/shallow_fun.py
index 9d7416f..3d5bcb9 100644
--- a/scratch/shallow_fun.py
+++ b/scratch/shallow_fun.py
@@ -68,11 +68,11 @@ def norm_transpose(data):
     return data_fixed
 
 
-def get_data_batch(x_data, y_label, batch_num, batch_size):
+def get_data_batch(x_data, y_label, batch_size, seed=None):
     """Function to get a random sampling of an evoked and stc pair"""
 
     # Get random sampling of data, seed by batch num
-    np.random.seed(batch_num)
+    np.random.seed(seed)
     rand_inds = np.random.randint(x_data.shape[0], size=batch_size)
 
     return x_data[rand_inds, :], y_label[rand_inds, :]
@@ -104,7 +104,7 @@ def get_all_vert_positions(src):
 
 
 def eval_error_norm(src_data_orig, src_data_est):
-    """Function to compute norm of the error vector at each dipoe
+    """Function to compute norm of the error vector at each dipole
 
     Parameters
     ----------

From 84783e82347809bbccf4094a4aabdf08d35d9c0b Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Tue, 15 Nov 2016 16:35:19 -0800
Subject: [PATCH 14/15] Add config file to separate param specification

---
 scratch/config.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 scratch/config.py

diff --git a/scratch/config.py b/scratch/config.py
new file mode 100644
index 0000000..4c1fd7d
--- /dev/null
+++ b/scratch/config.py
@@ -0,0 +1,42 @@
+"""
+config.py
+
+@author: wronk
+
+Configuration file for training/testing shallow inverse model
+"""
+
+import numpy as np
+
+# Entries in structurals and subjects must correspoond,
+# i.e. structurals[i] === subjects[i].
+
+# Structural MRI subject names
+conf_structurals = ['AKCLEE_103', 'AKCLEE_104', 'AKCLEE_105', 'AKCLEE_106',
+                    'AKCLEE_107', 'AKCLEE_109', 'AKCLEE_110', 'AKCLEE_115',
+                    'AKCLEE_117', 'AKCLEE_118', 'AKCLEE_119', 'AKCLEE_121',
+                    'AKCLEE_125', 'AKCLEE_126', 'AKCLEE_131', 'AKCLEE_132']
+
+# Experimental subject names
+conf_subjects = ['eric_sps_03', 'eric_sps_04', 'eric_sps_05', 'eric_sps_06',
+                 'eric_sps_07', 'eric_sps_09', 'eric_sps_10', 'eric_sps_15',
+                 'eric_sps_17', 'eric_sps_18', 'eric_sps_19', 'eric_sps_21',
+                 'eric_sps_25', 'eric_sps_26', 'eric_sps_31', 'eric_sps_32']
+
+# Model params for training/testing
+common_params = dict(dt=0.001,
+                     SNR=np.inf,
+                     rho=0.1,
+                     lam=1.)  # Weighting of regularizer cost
+
+# Model training params
+training_params = dict(n_training_times_noise=1000,  # Number of noise data samples
+                       n_training_times_sparse=0,  # Number of sparse data samples
+                       batch_size=100,
+                       n_training_iters=int(1e4),  # Number of training iterations
+                       opt_lr=1e-4)  # Learning rate for optimizer
+
+# Model evaluation params
+eval_params = dict(n_avg_verts=25,  # Number of verts to avg when determining est position
+                   n_test_verts=1000,  # Probably should be <= 1000 to avoid mem problems
+                   linear_inv='MNE')  #sLORETA or MNE

From cfe2f8fae27e66be2fc4b4ef2757ff90e2703c7c Mon Sep 17 00:00:00 2001
From: wronk <wronk.mark@gmail.com>
Date: Thu, 17 Nov 2016 14:53:38 -0800
Subject: [PATCH 15/15] Update saving, other minor things

---
 scratch/config.py      |  4 ++--
 scratch/shallow.py     | 12 ++++++++----
 scratch/shallow_fun.py |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/scratch/config.py b/scratch/config.py
index 4c1fd7d..74ac073 100644
--- a/scratch/config.py
+++ b/scratch/config.py
@@ -33,10 +33,10 @@
 training_params = dict(n_training_times_noise=1000,  # Number of noise data samples
                        n_training_times_sparse=0,  # Number of sparse data samples
                        batch_size=100,
-                       n_training_iters=int(1e4),  # Number of training iterations
+                       n_training_iters=int(1e3),  # Number of training iterations
                        opt_lr=1e-4)  # Learning rate for optimizer
 
 # Model evaluation params
 eval_params = dict(n_avg_verts=25,  # Number of verts to avg when determining est position
                    n_test_verts=1000,  # Probably should be <= 1000 to avoid mem problems
-                   linear_inv='MNE')  #sLORETA or MNE
+                   linear_inv='MNE')  # sLORETA or MNE
diff --git a/scratch/shallow.py b/scratch/shallow.py
index b423202..e076b0e 100644
--- a/scratch/shallow.py
+++ b/scratch/shallow.py
@@ -201,11 +201,15 @@ def sparse_objective(sensor_dim, source_dim, yhat, h_list, sess):
             _, obj, summary = sess.run([train_step, sparse_cost,
                                         merged_summaries], feed_dict)
             train_writer.add_summary(summary, ii)
-            print("\titer: %d, cost: %.2f" % (ii, obj))
+            print("\titer: %04i, cost: %.2f" % (ii, obj))
         else:
             _, obj = sess.run([train_step, sparse_cost], feed_dict)
 
-    sess.close()
-
     if save_network:
-        saver.save(sess, 'model_{}'.format(struct))
+        save_fold = 'saved_models'
+        if not os.path.isdir(save_fold):
+            os.mkdir(save_fold)
+
+        saver.save(sess, save_fold + '/model_{}'.format(struct))
+
+    sess.close()
diff --git a/scratch/shallow_fun.py b/scratch/shallow_fun.py
index 3d5bcb9..9ee2031 100644
--- a/scratch/shallow_fun.py
+++ b/scratch/shallow_fun.py
@@ -53,7 +53,7 @@ def gen_evoked_subject(signal, fwd, cov, evoked_info, dt, noise_snr,
 
     evoked = simulate_evoked(fwd, stc, evoked_info, cov, noise_snr,
                              random_state=seed)
-    evoked.add_eeg_average_proj()
+    evoked.set_eeg_reference()
 
     return evoked, stc