diff --git a/tutorial.py b/tutorial.py
index 54aa840..e10db3f 100644
--- a/tutorial.py
+++ b/tutorial.py
@@ -2,6 +2,7 @@
 import matplotlib.pyplot as plt
 import matplotlib 
 import numpy as np
+import sys
 
 # created by us 
 import gridworld 
@@ -10,10 +11,21 @@
 #   This is a very simple script to play with RL params in a gridworld    #
 # ----------------------------------------------------------------------- #
 #
-# The first thing you need to do is to understand the code below and
-# fill in the update_Q() function fill in the code so it can do either
-# SARSA or Q-Learning (and later, you can explore Monte Carlo/etc).
-# There's a section with task parameters and a section with 
+# The first thing you need to do is to understand the code
+# below. Then, fill in the update_Q_SARSA() and update_Q_Learning()
+# functions. You can use execute_configuration() to ensure your
+# algorithm runs and produces plots before exploring with the help of
+# run_parameter_sweep().
+#
+# Later, you can explore Monte Carlo/etc. Note that adding
+# Monte Carlo, supporting n-step TD, or Q(sigma) require additional
+# changes to the per-episode processing, including storing
+# history. For simplicity, we currently use a simple if-else to toggle
+# between single-step methods.
+#
+# The Config section allows you to tweak default parameters as well as
+# run parameter sweeps to explore the effects on value functions and
+# policies.
 #
 # Next, there are several different domains below.  Here are some
 # things to explore to get you started, and then you can create your
@@ -26,7 +38,9 @@
 # Exploration #2: Try the test_maze.  What happens as you adjust the
 # discount factor?  What about the learning rate?  Does it help to
 # adjust the learning rate with time?  The epsilon with time?  Explore
-# both using Q-learning and SARSA.  
+# both using Q-learning and SARSA. Changing the learning rate or
+# epsilon with time will require code changes in
+# execute_configuration().
 #
 # Exploration #3: Try the simple_grid with different pit_rewards and
 # action_error_prob: how does the policy change?  
@@ -36,24 +50,25 @@
 # decreasing).  How does the policy change?  Does it matter whether
 # you use Q-learning or SARSA?
 #
-# Exploration #5: Adjust the code in gridworld.py to stop a fixed
-# number of iterations rather than when the goal is reached.  Set the
-# number of iterations to be large.  Does this change affect how
-# quickly the policy converges?  How quickly the value function
-# converges?  Explain what you see.  
+# Exploration #5: Adjust the code in gridworld.py to stop after a
+# fixed number of iterations rather than when the goal is reached.
+# Set the number of iterations to be large.  Does this change affect
+# how quickly the policy converges?  How quickly the value function
+# converges?  Explain what you see.
 
 # -------------------- #
 #   Different Tasks    #
 # -------------------- #
-# You can also create your own! 
-short_hallway = [   
+# You can also create your own!
+TASK_MAP = {
+'short_hallway' : [
     '###', # '#' = wall
     '#o#', # 'o' = origin grid cell
     '#.#', # '.' = empty grid cell
     '#*#', # '*' = goal
-    '###']
+    '###'],
 
-long_hallway = [   
+'long_hallway' : [
     '###', # '#' = wall
     '#o#', # 'o' = origin grid cell
     '#.#', # '.' = empty grid cell
@@ -64,9 +79,9 @@
     '#.#', # '.' = empty grid cell
     '#.#', # '.' = empty grid cell
     '#*#', # '*' = goal
-    '###']
+    '###'],
 
-test_maze = [
+'test_maze' : [
     '#########',
     '#..#....#',
     '#..#..#.#',
@@ -75,22 +90,23 @@
     '#....*#.#',
     '#######.#',
     '#o......#',
-    '#########']
+    '#########'],
 
-simple_grid = [   
+'simple_grid' : [
     '#######', 
     '#o....#', 
     '#..X..#', 
     '#....*#', 
-    '#######']    
+    '#######'],
 
-cliff_grid = [
+'cliff_grid' : [
     '#######', 
     '#.....#', 
     '#.##..#', 
     '#o...*#',
     '#XXXXX#', 
-    '#######']    
+    '#######'],
+}
 
 # ----------------- #
 #   Key Functions   # 
@@ -103,72 +119,114 @@ def policy( state , Q_table , action_count , epsilon ):
         action = np.argmax( Q_table[ state , : ] ) 
     return action 
 
-# Update the Q table 
-def update_Q( Q_table , alpha , gamma , state , action , reward , new_state , new_action ):
-    # Fill in this function
+# Update the Q table.
+def update_Q_SARSA( Q_table , alpha , gamma , state , action , reward , new_state , new_action ):
+    sys.exit("TODO: Implement update_Q_SARSA")
     return Q_table 
-    
+
+def update_Q_Learning( Q_table , alpha , gamma , state , action , reward , new_state ):
+    sys.exit("TODO: Implement update_Q_Learning!")
+    return Q_table
+
 # -------------------- #
-#   Create the Task    #
+# Config               #
 # -------------------- #
-# Task Parameters
-task_name = short_hallway 
-action_error_prob = .1 
-pit_reward = -50
-task = gridworld.GridWorld( task_name ,
-                            action_error_prob=action_error_prob, 
-                            rewards={'*': 50, 'moved': -1, 'hit-wall': -1,'X':pit_reward} )
-task.get_max_reward() 
-
-# ---------------- #
-#   Run the Task   # 
-# ---------------- #
-# Algorithm Parameters 
-alpha = .5
-epsilon = .1
-gamma = .99 
-state_count = task.num_states  
-action_count = task.num_actions 
-episode_count = 250
-rep_count = 10
-
-# Loop over some number of episodes
-episode_reward_set = np.zeros( ( rep_count , episode_count ) ) 
-for rep_iter in range( rep_count ):
-
-    # Initialize the Q table 
-    Q_table = np.zeros( ( state_count , action_count ) )
-
-    # Loop until the episode is done 
-    for episode_iter in range( episode_count ):
+#
+# Change any values that you want applied to all configurations in the
+# DEFAULT_CONFIG.
+#
+# Use sweep_params_row and sweep_params_column to run a 1- or 2-dimensional
+# parameter sweep on any of the fields in the DEFAULT_CONFIG to
+# observe effects.
+DEFAULT_CONFIG = {
+    # Task Parameters
+    'task_name' : 'short_hallway', # See TASK_MAP keys above.
+    'action_error_prob' : 0.1, # [0,1]
+    'pit_reward' : -50, # Go nuts!
+    # Algorithm Parameters
+    'method' : 'sarsa', # Current Values: 'sarsa', 'qlearning'
+    'alpha' : .5, # [0,1]
+    'epsilon' : .1, # [0,1]
+    'gamma' : .99, # [0,1]
+    'episode_count' : 250, # Go nuts!
+    'rep_count' : 10, # Go nuts!
+    'episode_max_length' : 300, # Save yourself from infinite loops.
+}
+
+sweep_params_row = {
+    'key' : 'gamma',
+    'values' : [0, .1, .5, .9, .99]
+}
+sweep_params_column = {
+    'key' : 'method',
+    'values' : ['sarsa', 'qlearning']
+}
+
+def run_parameter_sweep(sweep_params_row = None, sweep_params_column = None):
+    if not sweep_params_row:
+        execute_configuration()
+    else:
+        param_base = DEFAULT_CONFIG
+        for row, param_row_value in enumerate(sweep_params_row['values']):
+            param_base[sweep_params_row['key']] = param_row_value
+            if not sweep_params_column:
+                execute_configuration(param_base, row_index=row, column_index=0, width=1, height=len(sweep_params_row['values']))
+            else:
+                for column, param_column_value in enumerate(sweep_params_column['values']):
+                    param_base[sweep_params_column['key']] = param_column_value
+                    execute_configuration(param_base, row_index=row, column_index=column, width=len(sweep_params_column['values']), height=len(sweep_params_row['values']))
+
+def execute_configuration(config=DEFAULT_CONFIG, row_index=0, column_index=0, height=1, width=1):
+    task = gridworld.GridWorld(TASK_MAP[config['task_name']] ,
+                            action_error_prob=config['action_error_prob'],
+                            rewards={'*': 50, 'moved': -1, 'hit-wall': -1,'X':config['pit_reward']} )
+    task.get_max_reward()
+
+    # Loop over some number of episodes
+    episode_reward_set = np.zeros( ( config['rep_count'] , config['episode_count'] ) )
+    for rep_iter in range( config['rep_count'] ):
+
+        # Initialize the Q table
+        Q_table = np.zeros( ( task.num_states , task.num_actions ) )
+
+        # Loop until the episode is done
+        for episode_iter in range( config['episode_count'] ):
         
-        # Start the task 
-        task.reset()
-        state = task.observe() 
-        action = policy( state , Q_table , action_count , epsilon ) 
-        episode_reward_list = []
-        task_iter = 0 
-
-        # Loop until done -- check when do we get the final state reward? 
-        while True:
-            task_iter = task_iter + 1 
-            new_state, reward = task.perform_action( action )
-            new_action = policy( new_state , Q_table , action_count , epsilon ) 
-            
-            # update the Q_table
-            Q_table = update_Q( Q_table , alpha , gamma , 
-                                state , action , reward , new_state , new_action ) 
+            # Start the task
+            task.reset()
+            state = task.observe()
+            action = policy( state , Q_table , task.num_actions , config['epsilon'] )
+            episode_reward_list = []
+            task_iter = 0
 
-            # store the data
-            episode_reward_list.append( reward ) 
+            # Loop until done -- check when do we get the final state reward?
+            while True:
+                task_iter = task_iter + 1
+                new_state, reward = task.perform_action( action )
+                new_action = policy( new_state , Q_table , task.num_actions , config['epsilon'] )
             
-            # stop if at goal/else update for the next iteration 
-            if task.is_terminal( state ):
-                episode_reward_set[ rep_iter , episode_iter ] = np.sum( episode_reward_list )
-                break
-            else:
-                state = new_state
-                action = new_action 
+                # Update the Q_table.
+                if config['method'] == 'sarsa':
+                    Q_table = update_Q_SARSA( Q_table , config['alpha'] , config['gamma'] ,
+                                              state , action , reward , new_state , new_action ) 
+                elif config['method'] == 'qlearning':
+                    Q_table = update_Q_Learning( Q_table , config['alpha'] , config['gamma'] ,
+                                                 state , action , reward , new_state )
+                else:
+                    sys.exit("Unrecognized algorithm %s. Consider adding support?" % config['method'])
+
+                # store the data
+                episode_reward_list.append( reward )
+
+                # stop if at goal/else update for the next iteration
+                if task.is_terminal( state ) or task_iter > config['episode_max_length']:
+                    episode_reward_set[ rep_iter , episode_iter ] = np.sum( episode_reward_list )
+                    break
+                else:
+                    state = new_state
+                    action = new_action
+
+    add_plot(config, Q_table, episode_reward_set, row_index, column_index, width, height)
 
 # -------------- #
 #   Make Plots   #
@@ -180,52 +238,64 @@ def plot_arrow( location , direction , plot ):
     arrow = plt.arrow( location[0] , location[1] , dx , dy , fc="k", ec="k", head_width=0.05, head_length=0.1 )
     plot.add_patch(arrow) 
 
-# Useful stats for the plot
-row_count = len( task_name )
-col_count = len( task_name[0] ) 
-value_function = np.reshape( np.max( Q_table , 1 ) , ( row_count , col_count ) )
-policy_function = np.reshape( np.argmax( Q_table , 1 ) , ( row_count , col_count ) )
-wall_info = .5 + np.zeros( ( row_count , col_count ) )
-wall_mask = np.zeros( ( row_count , col_count ) )
-for row in range( row_count ):
-    for col in range( col_count ):
-        if task_name[row][col] == '#':
-            wall_mask[row,col] = 1     
-wall_info = np.ma.masked_where( wall_mask==0 , wall_info )
-
-# Plot the rewards
-plt.subplot( 1 , 2 , 1 ) 
-plt.plot( episode_reward_set.T )
-plt.title( 'Rewards per Episode (each line is a rep)' ) 
-plt.xlabel( 'Episode Number' )
-plt.ylabel( 'Sum of Rewards in Episode' )
-
-# value function plot 
-plt.subplot( 1 , 2 , 2 ) 
-plt.imshow( value_function , interpolation='none' , cmap=matplotlib.cm.jet )
-plt.colorbar()
-plt.imshow( wall_info , interpolation='none' , cmap=matplotlib.cm.gray )
-plt.title( 'Value Function' )
-
-# policy plot 
-# plt.imshow( 1 - wall_mask , interpolation='none' , cmap=matplotlib.cm.gray )    
-for row in range( row_count ):
-    for col in range( col_count ):
-        if wall_mask[row][col] == 1:
-            continue 
-        if policy_function[row,col] == 0:
-            dx = 0; dy = -.5
-        if policy_function[row,col] == 1:
-            dx = 0; dy = .5
-        if policy_function[row,col] == 2:
-            dx = .5; dy = 0
-        if policy_function[row,col] == 3:
-            dx = -.5; dy = 0
-        plt.arrow( col , row , dx , dy , shape='full', fc='w' , ec='w' , lw=3, length_includes_head=True, head_width=.2 )
-plt.title( 'Policy' )        
+def add_plot(config, Q_table, episode_reward_set, row_index, column_index, width, height):
+    # Useful stats for the plot
+    task_map = TASK_MAP[config['task_name']]
+    row_count = len( task_map )
+    col_count = len( task_map[0] )
+    value_function = np.reshape( np.max( Q_table , 1 ) , ( row_count , col_count ) )
+    policy_function = np.reshape( np.argmax( Q_table , 1 ) , ( row_count , col_count ) )
+    wall_info = .5 + np.zeros( ( row_count , col_count ) )
+    wall_mask = np.zeros( ( row_count , col_count ) )
+    for row in range( row_count ):
+        for col in range( col_count ):
+            if task_map[row][col] == '#':
+                wall_mask[row,col] = 1
+    wall_info = np.ma.masked_where( wall_mask==0 , wall_info )
+
+    # Plot the rewards
+    plt.subplot( height , width*2 , (row_index * (width*2) + column_index*2 + 1) )
+    plt.plot( episode_reward_set.T )
+    plt.title( 'Rewards per Episode (each line is a rep)' )
+    plt.xlabel( 'Episode Number' )
+    plt.ylabel( 'Sum of Rewards in Episode' )
+
+    # value function plot
+    plt.subplot( height , width*2 , (row_index * (width*2) + column_index*2 + 2) )
+    plt.imshow( value_function , interpolation='none' , cmap=matplotlib.cm.jet )
+    plt.colorbar()
+    plt.imshow( wall_info , interpolation='none' , cmap=matplotlib.cm.gray )
+
+    # policy plot
+    for row in range( row_count ):
+        for col in range( col_count ):
+            if wall_mask[row][col] == 1:
+                continue
+            if policy_function[row,col] == 0:
+                dx = 0; dy = -.5
+            if policy_function[row,col] == 1:
+                dx = 0; dy = .5
+            if policy_function[row,col] == 2:
+                dx = .5; dy = 0
+            if policy_function[row,col] == 3:
+                dx = -.5; dy = 0
+            plt.arrow( col , row , dx , dy , shape='full', fc='w' , ec='w' , lw=3, length_includes_head=True, head_width=.2 )
+    plt.title( 'Value Function\n(Policy As Arrows)\nFor %s\nOn %s' % (config['method'], config['task_name']))
+
+
+# -------------------- #
+# Kick off the actual execution here.
+# -------------------- #
+plt.figure(figsize=(20*min(1, len(sweep_params_column)), 20*min(1, len(sweep_params_row))))
+
+run_parameter_sweep(sweep_params_row, sweep_params_column)
+
 plt.show( block=False ) 
 
-# If you want to interact with it further... 
+# If you want to interact with it further...
+# Note: If you ran more than one config, you will be interacting with
+# the environment of the last one.
+# Note: Type 'exit' to quit when you're done.
 import pdb 
 pdb.set_trace()