diff --git a/pynumdiff/basis_fit/_basis_fit.py b/pynumdiff/basis_fit/_basis_fit.py
index 81e1abe..5a22c48 100644
--- a/pynumdiff/basis_fit/_basis_fit.py
+++ b/pynumdiff/basis_fit/_basis_fit.py
@@ -19,9 +19,8 @@ def spectraldiff(x, dt, params=None, options=None, high_freq_cutoff=None, even_e
     :param bool pad_to_zero_dxdt: if True, extend the data with extra regions that smoothly force the derivative to
             zero before taking FFT.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `high_freq_cutoff`, " +
@@ -52,27 +51,25 @@ def spectraldiff(x, dt, params=None, options=None, high_freq_cutoff=None, even_e
     if even_extension is True:
         x = np.hstack((x, x[::-1]))
 
-    # If odd, make N even, and pad x
+    # Form wavenumbers
     N = len(x)
-
-    # Define the frequency range.
     k = np.concatenate((np.arange(N//2 + 1), np.arange(-N//2 + 1, 0)))
     if N % 2 == 0: k[N//2] = 0 # odd derivatives get the Nyquist element zeroed out
-    omega = k*2*np.pi/(dt*N) # turn wavenumbers into frequencies in radians/s
 
-    # Frequency based smoothing: remove signals with a frequency higher than high_freq_cutoff
+    # Filter to zero out higher wavenumbers
     discrete_cutoff = int(high_freq_cutoff*N/2) # Nyquist is at N/2 location, and we're cutting off as a fraction of that
-    omega[discrete_cutoff:N-discrete_cutoff] = 0
+    filt = np.ones(k.shape); filt[discrete_cutoff:N-discrete_cutoff] = 0
+
+    # Smoothed signal
+    X = np.fft.fft(x)
+    x_hat = np.real(np.fft.ifft(filt * X))
+    x_hat = x_hat[padding:L+padding]
 
     # Derivative = 90 deg phase shift
-    dxdt_hat = np.real(np.fft.ifft(1.0j * omega * np.fft.fft(x)))
+    omega = 2*np.pi/(dt*N) # factor of 2pi/T turns wavenumbers into frequencies in radians/s
+    dxdt_hat = np.real(np.fft.ifft(1j * k * omega * filt * X))
     dxdt_hat = dxdt_hat[padding:L+padding]
 
-    # Integrate to get x_hat
-    x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt)
-    x0 = utility.estimate_integration_constant(x[padding:L+padding], x_hat)
-    x_hat = x_hat + x0
-
     return x_hat, dxdt_hat
 
 
@@ -88,9 +85,8 @@ def rbfdiff(x, _t, sigma=1, lmbd=0.01):
     :param float sigma: controls width of radial basis functions
     :param float lmbd: controls smoothness
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if np.isscalar(_t):
         t = np.arange(len(x))*_t
diff --git a/pynumdiff/finite_difference/_finite_difference.py b/pynumdiff/finite_difference/_finite_difference.py
index 59e8c2b..4fd926b 100644
--- a/pynumdiff/finite_difference/_finite_difference.py
+++ b/pynumdiff/finite_difference/_finite_difference.py
@@ -13,6 +13,9 @@ def finitediff(x, dt, num_iterations=1, order=2):
     :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal
             rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times
     :param int order: 1, 2, or 4, controls which finite differencing scheme to employ
+
+    :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if num_iterations < 1: raise ValueError("num_iterations must be >0")
     if order not in [1, 2, 4]: raise ValueError("order must be 1, 2, or 4")
@@ -59,7 +62,7 @@ def finitediff(x, dt, num_iterations=1, order=2):
     dxdt_hat /= dt # don't forget to scale by dt, can't skip it this time
 
     if num_iterations > 1: # We've lost a constant of integration in the above
-        x_hat += utility.estimate_integration_constant(x, x_hat) # uses least squares
+        x_hat += utility.estimate_integration_constant(x, x_hat)
 
     return x_hat, dxdt_hat
 
@@ -75,9 +78,8 @@ def first_order(x, dt, params=None, options={}, num_iterations=1):
     :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal
             rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     warn("`first_order` in past releases was actually calculating a second-order FD. Use `second_order` to achieve " +
         "approximately the same behavior. Note that odd-order methods have asymmetrical stencils, which causes " +
@@ -99,9 +101,8 @@ def second_order(x, dt, num_iterations=1):
     :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal
             rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     warn("`second_order` is deprecated. Call `finitediff` with order 2 instead.", DeprecationWarning)
     return finitediff(x, dt, num_iterations, 2)
@@ -116,9 +117,8 @@ def fourth_order(x, dt, num_iterations=1):
     :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal
             rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     warn("`fourth_order` is deprecated. Call `finitediff` with order 4 instead.", DeprecationWarning)
     return finitediff(x, dt, num_iterations, 4)
diff --git a/pynumdiff/kalman_smooth/_kalman_smooth.py b/pynumdiff/kalman_smooth/_kalman_smooth.py
index 93266b6..31a9393 100644
--- a/pynumdiff/kalman_smooth/_kalman_smooth.py
+++ b/pynumdiff/kalman_smooth/_kalman_smooth.py
@@ -25,12 +25,11 @@ def kalman_filter(y, _t, xhat0, P0, A, Q, C, R, B=None, u=None, save_P=True):
     :param bool save_P: whether to save history of error covariance and a priori state estimates, used with rts
         smoothing but nonstandard to compute for ordinary filtering
 
-    :return: tuple[np.array, np.array, np.array, np.array] of\n
-        - **xhat_pre** -- a priori estimates of xhat, with axis=0 the batch dimension, so xhat[n] gets the nth step
-        - **xhat_post** -- a posteriori estimates of xhat
-        - **P_pre** -- a priori estimates of P
-        - **P_post** -- a posteriori estimates of P
-        if :code:`save_P` is :code:`True` else only **xhat_post** to save memory
+    :return: - **xhat_pre** (np.array) -- a priori estimates of xhat, with axis=0 the batch dimension, so xhat[n] gets the nth step
+             - **xhat_post** (np.array) -- a posteriori estimates of xhat
+             - **P_pre** (np.array) -- a priori estimates of P
+             - **P_post** (np.array) -- a posteriori estimates of P
+             if :code:`save_P` is :code:`True` else only **xhat_post** to save memory
     """
     N = y.shape[0]
     m = xhat0.shape[0] # dimension of the state
@@ -84,16 +83,15 @@ def rts_smooth(_t, A, xhat_pre, xhat_post, P_pre, P_post, compute_P_smooth=True)
     :param float or array[float] _t: This function supports variable step size. This parameter is either the constant
         step size if given as a single float, or sample locations if given as an array of same length as the state histories.
     :param np.array A: state transition matrix, in discrete time if constant dt, in continuous time if variable dt
-    :param list[np.array] xhat_pre: a priori estimates of xhat from a kalman_filter forward pass
-    :param list[np.array] xhat_post: a posteriori estimates of xhat from a kalman_filter forward pass
-    :param list[np.array] P_pre: a priori estimates of P from a kalman_filter forward pass
-    :param list[np.array] P_post: a posteriori estimates of P from a kalman_filter forward pass
+    :param np.array xhat_pre: a priori estimates of xhat from a kalman_filter forward pass
+    :param np.array xhat_post: a posteriori estimates of xhat from a kalman_filter forward pass
+    :param np.array P_pre: a priori estimates of P from a kalman_filter forward pass
+    :param np.array P_post: a posteriori estimates of P from a kalman_filter forward pass
     :param bool compute_P_smooth: it's extra work if you don't need it
     
-    :return: tuple[np.array, np.array] of\n
-        - **xhat_smooth** -- RTS smoothed xhat
-        - **P_smooth** -- RTS smoothed P
-        if :code:`compute_P_smooth` is :code:`True` else only **xhat_smooth**
+    :return: - **xhat_smooth** (np.array) -- RTS smoothed xhat
+             - **P_smooth** (np.array) -- RTS smoothed P estimates
+             if :code:`compute_P_smooth` is :code:`True` else only **xhat_smooth**
     """
     xhat_smooth = np.empty(xhat_post.shape); xhat_smooth[-1] = xhat_post[-1] # preallocate arrays
     if compute_P_smooth: P_smooth = np.empty(P_post.shape); P_smooth[-1] = P_post[-1]
@@ -123,9 +121,8 @@ def rtsdiff(x, _t, order, log_qr_ratio, forwardbackward):
     :param bool forwardbackward: indicates whether to run smoother forwards and backwards
         (usually achieves better estimate at end points)
 
-    :return: tuple[np.array, np.array] of\n
-        - **x_hat** -- estimated (smoothed) x
-        - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if not np.isscalar(_t) and len(x) != len(_t):
         raise ValueError("If `_t` is given as array-like, must have same length as `x`.")
@@ -185,9 +182,8 @@ def constant_velocity(x, dt, params=None, options=None, r=None, q=None, forwardb
     :param bool forwardbackward: indicates whether to run smoother forwards and backwards
         (usually achieves better estimate at end points)
 
-    :return: tuple[np.array, np.array] of\n
-        - **x_hat** -- estimated (smoothed) x
-        - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # boilerplate backwards compatibility code
         warn("`params` and `options` parameters will be removed in a future version. Use `r`, " +
@@ -216,9 +212,8 @@ def constant_acceleration(x, dt, params=None, options=None, r=None, q=None, forw
     :param bool forwardbackward: indicates whether to run smoother forwards and backwards
         (usually achieves better estimate at end points)
 
-    :return: tuple[np.array, np.array] of\n
-        - **x_hat** -- estimated (smoothed) x
-        - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # boilerplate backwards compatibility code
         warn("`params` and `options` parameters will be removed in a future version. Use `r`, " +
@@ -247,9 +242,8 @@ def constant_jerk(x, dt, params=None, options=None, r=None, q=None, forwardbackw
     :param bool forwardbackward: indicates whether to run smoother forwards and backwards
         (usually achieves better estimate at end points)
 
-    :return: tuple[np.array, np.array] of\n
-        - **x_hat** -- estimated (smoothed) x
-        - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # boilerplate backwards compatibility code
         warn("`params` and `options` parameters will be removed in a future version. Use `r`, " +
@@ -290,13 +284,9 @@ def robustdiff(x, dt, order, log_q, log_r, proc_huberM=6, meas_huberM=0):
     :param float proc_huberM: quadratic-to-linear transition point for process loss
     :param float meas_huberM: quadratic-to-linear transition point for measurement loss
 
-    :return: tuple[np.array, np.array] of\n
-        - **x_hat** -- estimated (smoothed) x
-        - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
-    #q = 1e4 # I found q too small worsened condition number of the Q matrix, so fixing it at a biggish value
-    #r = q/qr_ratio
-
     A_c = np.diag(np.ones(order), 1) # continuous-time A just has 1s on the first diagonal (where 0th is main diagonal)
     Q_c = np.zeros(A_c.shape); Q_c[-1,-1] = 10**log_q # continuous-time uncertainty around the last derivative
     C = np.zeros((1, order+1)); C[0,0] = 1 # we measure only y = noisy x
@@ -324,13 +314,13 @@ def convex_smooth(y, A, Q, C, R, proc_huberM, meas_huberM):
     :param float proc_huberM: Huber loss parameter. :math:`M=0` reduces to :math:`\\sqrt{2}\\ell_1`.
     :param float meas_huberM: Huber loss parameter. :math:`M=\\infty` reduces to :math:`\\frac{1}{2}\\ell_2^2`.
     
-    :return: np.array -- state estimates (state_dim x N)
+    :return: (np.array) -- state estimates (state_dim x N)
     """
     N = len(y)
     x_states = cvxpy.Variable((A.shape[0], N)) # each column is [position, velocity, acceleration, ...] at step n
 
     def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt
-        a = 2*np.exp(-M**2 / 2)/M
+        a = 2*np.exp(-M**2 / 2)/M # huber_const smoothly transitions Huber between 1-norm and 2-norm squared cases
         b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1)
         return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b))
 
@@ -338,17 +328,17 @@ def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13
     proc_resids = np.linalg.inv(sqrtm(Q)) @ (x_states[:,1:] - A @ x_states[:,:-1]) # all Q^(-1/2)(x_n - A x_{n-1})
     meas_resids = np.linalg.inv(sqrtm(R)) @ (y.reshape(C.shape[0],-1) - C @ x_states) # all R^(-1/2)(y_n - C x_n)
     # Process terms: sum of J(proc_resids)
-    objective = 0.5*cvxpy.sum_squares(proc_resids) if huberM == float('inf') \
-                else np.sqrt(2)*cvxpy.sum(cvxpy.abs(proc_resids)) if huberM < 1e-3 \
-                else huber_const(huberM)*cvxpy.sum(cvxpy.huber(proc_resids, huberM))  # 1/2 l2^2, l1, or Huber
+    objective = 0.5*cvxpy.sum_squares(proc_resids) if proc_huberM == float('inf') \
+                else np.sqrt(2)*cvxpy.sum(cvxpy.abs(proc_resids)) if proc_huberM < 1e-3 \
+                else huber_const(proc_huberM)*cvxpy.sum(cvxpy.huber(proc_resids, proc_huberM)) # 1/2 l2^2, l1, or Huber
     # Measurement terms: sum of V(meas_resids)
-    objective += 0.5*cvxpy.sum_squares(meas_resids) if huberM == float('inf') \
-                else np.sqrt(2)*cvxpy.sum(cvxpy.abs(meas_resids)) if huberM < 1e-3 \
-                else huber_const(huberM)*cvxpy.sum(cvxpy.huber(meas_resids, huberM)) # CVXPY quirk: norm(, 1) != sum(abs()) for matrices
+    objective += 0.5*cvxpy.sum_squares(meas_resids) if meas_huberM == float('inf') \
+                else np.sqrt(2)*cvxpy.sum(cvxpy.abs(meas_resids)) if meas_huberM < 1e-3 \
+                else huber_const(meas_huberM)*cvxpy.sum(cvxpy.huber(meas_resids, meas_huberM)) # CVXPY quirk: norm(, 1) != sum(abs()) for matrices
     
     problem = cvxpy.Problem(cvxpy.Minimize(objective))
     try: problem.solve(solver=cvxpy.CLARABEL); print("CLARABEL succeeded")
-    except cvxpy.error.SolverError: pass # Could try a different solver here, like SCS, but slows things down
+    except cvxpy.error.SolverError: pass # Could try another solver here, like SCS, but slows things down
     
-    if x_states.value is None: return np.full((A.shape[0], N), np.nan) # There can be solver failure, even without exception
+    if x_states.value is None: return np.full((A.shape[0], N), np.nan) # There can be solver failure, even without error
     return x_states.value
diff --git a/pynumdiff/linear_model/_linear_model.py b/pynumdiff/linear_model/_linear_model.py
index 96a5c33..e9b8dca 100644
--- a/pynumdiff/linear_model/_linear_model.py
+++ b/pynumdiff/linear_model/_linear_model.py
@@ -83,9 +83,8 @@ def lineardiff(x, dt, params=None, options=None, order=None, gamma=None, window_
     :param str kernel: name of kernel to use for weighting and smoothing windows ('gaussian' or 'friedrichs')
     :param str solver: CVXPY solver to use, one of :code:`cvxpy.installed_solvers()`
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None:
         warn("`params` and `options` parameters will be removed in a future version. Use `order`, " +
diff --git a/pynumdiff/optimize/_optimize.py b/pynumdiff/optimize/_optimize.py
index 689356a..1c21ea0 100644
--- a/pynumdiff/optimize/_optimize.py
+++ b/pynumdiff/optimize/_optimize.py
@@ -87,15 +87,23 @@
            'forwardbackward': {True, False}},
                         {'q': (1e-10, 1e10),
                          'r': (1e-10, 1e10)}),
+    # robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
+    #               'log_q': [1., 4, 8, 12], # decimal after first entry ensure this is treated as float type
+    #               'log_r': [-1., 1, 4, 8],
+    #         #'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers,
+    #         'meas_huberM': [0., 2, 6]}, # and 6 assumes basically no outliers -> l2 norm. Try (1 - norm.cdf(M))*2 to see outlier portion
+    #              {'log_q': (-1, 18),
+    #               'log_r': (-5, 18),
+    #         'proc_huberM': (0, 6),
+    #         'meas_huberM': (0, 6)}),
     robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
                   'log_q': [1., 4, 8, 12], # decimal after first entry ensure this is treated as float type
                   'log_r': [-1., 1, 4, 8],
-            #'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers,
-            'meas_huberM': [0., 2, 6]}, # and 6 assumes basically no outliers -> l2 norm. Try (1 - norm.cdf(M))*2 to see outlier portion
-                 {'log_q': (-1, 18),
-                  'log_r': (-5, 18),
-            'proc_huberM': (0, 6),
-            'meas_huberM': (0, 6)}),
+               #'qr_ratio': [10**k for k in range(-1, 16, 4)],
+                 'huberM': [0., 5, 10]}, # 0. so type is float. Good choices here really depend on the data scale
+                 {'log_q': (0, 18),
+                  'log_r': (-5, 10),
+                 'huberM': (0, 20)}), # really only want to use quadratic when nearby; 20sigma is a huge distance
     lineardiff: ({'kernel': 'gaussian',
                    'order': 3,
                    'gamma': [1e-1, 1, 10, 100],
@@ -132,27 +140,28 @@ def _objective_function(point, func, x, dt, singleton_params, categorical_params
 
     :return: float, cost of this objective at the point
     """
+    # Short circuit if this hyperparam combo has already been queried, ~10% savings per #160
     key = sha1((''.join(f"{v:.3e}" for v in point) + # This hash is stable across processes. Takes bytes
                ''.join(str(v) for k,v in sorted(categorical_params.items()))).encode()).digest()
-    if key in cache: return cache[key] # short circuit if this hyperparam combo has already been queried, ~10% savings per #160
+    if key in cache: return cache[key]
 
+    # Query the differentiation method at this choice of hyperparameters
     point_params = {k:(v if search_space_types[k] == float else int(np.round(v)))
                         for k,v in zip(search_space_types, point)} # point -> dict
-    # add back in the singletons we're not searching over
+    # add back in singletons and categorical choices the Nelder-Mead isn't searching over
     try: x_hat, dxdt_hat = func(x, dt, **point_params, **singleton_params, **categorical_params) # estimate x and dxdt
     except np.linalg.LinAlgError: cache[key] = 1e10; return 1e10 # some methods can fail numerically
 
-    # evaluate estimate
-    if dxdt_truth is not None: # then minimize ||dxdt_hat - dxdt_truth||^2
-        if metric == 'rmse':
-            rms_rec_x, rms_x, rms_dxdt = evaluate.rmse(x, dt, x_hat, dxdt_hat, dxdt_truth=dxdt_truth, padding=padding)
+    # Evaluate estimate according to a loss function
+    if dxdt_truth is not None:
+        if metric == 'rmse': # minimize ||dxdt_hat - dxdt_truth||_2
+            rms_dxdt = evaluate.rmse(dxdt_truth, dxdt_hat, padding=padding)
             cache[key] = rms_dxdt; return rms_dxdt
         elif metric == 'error_correlation':
-            ec = evaluate.error_correlation(dxdt_hat, dxdt_truth, padding=padding)
+            ec = evaluate.error_correlation(dxdt_truth, dxdt_hat, padding=padding)
             cache[key] = ec; return ec
-    else: # then minimize [ || integral(dxdt_hat) - x||^2 + gamma*TV(dxdt_hat) ]
-        rms_rec_x, rms_x, rms_dxdt = evaluate.rmse(x, dt, x_hat, dxdt_hat, dxdt_truth=None, padding=padding)
-        cost = rms_rec_x + tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)
+    else: # then minimize sqrt{2*Mean(Huber((x_hat- x)/sigma))}*sigma + gamma*TV(dxdt_hat)
+        cost = evaluate.robust_rme(x, x_hat, padding=padding) + tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)
         cache[key] = cost; return cost
 
 
@@ -186,8 +195,6 @@ def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={}
     """
     if metric not in ['rmse','error_correlation']:
         raise ValueError('`metric` should either be `rmse` or `error_correlation`.')
-    if metric == 'error_correlation' and dxdt_truth is None:
-        raise ValueError('`metric` can only be `error_correlation` if `dxdt_truth` is given.')
 
     search_space, bounds = method_params_and_bounds[func]
     search_space.update(search_space_updates) # for things not given, use defaults
diff --git a/pynumdiff/polynomial_fit/_polynomial_fit.py b/pynumdiff/polynomial_fit/_polynomial_fit.py
index 758f105..33d56c1 100644
--- a/pynumdiff/polynomial_fit/_polynomial_fit.py
+++ b/pynumdiff/polynomial_fit/_polynomial_fit.py
@@ -19,9 +19,8 @@ def splinediff(x, _t, params=None, options={}, degree=3, s=None, num_iterations=
         until the smoothing condition is satisfied: :math:`\\sum_t (x[t] - \\text{spline}[t])^2 \\leq s`
     :param int num_iterations: how many times to apply smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `order`, `s`, and " +
@@ -61,9 +60,8 @@ def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, st
     :param int step_size: step size for sliding
     :param str kernel: name of kernel to use for weighting and smoothing windows ('gaussian' or 'friedrichs')
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None:
         warn("`params` and `options` parameters will be removed in a future version. Use `degree` " +
@@ -115,9 +113,8 @@ def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None,
     :param int smoothing_win: size of the window used for gaussian smoothing, a good default is
         window_size, but smaller for high frequnecy data
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `degree`, " +
@@ -138,7 +135,6 @@ def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None,
     dxdt_hat = utility.convolutional_smoother(dxdt_hat, kernel)
 
     x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt)
-    x0 = utility.estimate_integration_constant(x, x_hat)
-    x_hat = x_hat + x0
+    x_hat += utility.estimate_integration_constant(x, x_hat)
 
     return x_hat, dxdt_hat
diff --git a/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py b/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py
index cb0d5c9..4d6cb10 100644
--- a/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py
+++ b/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py
@@ -19,9 +19,8 @@ def kerneldiff(x, dt, kernel='friedrichs', window_size=5, num_iterations=1):
     :param int window_size: filtering kernel size
     :param int num_iterations: how many times to apply mean smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if kernel in ['mean', 'gaussian', 'friedrichs']:
         kernel = getattr(utility, f"{kernel}_kernel")(window_size)
@@ -50,9 +49,8 @@ def meandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1):
     :param int window_size: filter window size
     :param int num_iterations: how many times to apply mean smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " +
@@ -76,9 +74,8 @@ def mediandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1):
     :param int window_size: filter window size
     :param int num_iterations: how many times to apply median smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " +
@@ -103,9 +100,8 @@ def gaussiandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1
     :param int window_size: filter window size
     :param int num_iterations: how many times to apply gaussian smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " +
@@ -130,9 +126,8 @@ def friedrichsdiff(x, dt, params=None, options={}, window_size=5, num_iterations
     :param int window_size: filter window size
     :param int num_iterations: how many times to apply smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " +
@@ -158,9 +153,8 @@ def butterdiff(x, dt, params=None, options={}, filter_order=2, cutoff_freq=0.5,
         value is normalized to the range 0-1, where 1 is the Nyquist frequency.
     :param int num_iterations: how many times to apply smoothing
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `filter_order`, " +
diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py
index bf82cf0..211802c 100644
--- a/pynumdiff/tests/test_diff_methods.py
+++ b/pynumdiff/tests/test_diff_methods.py
@@ -51,7 +51,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
     (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]),
     (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]),
     (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}),
-    #(robustdiff, {'order':3, 'qr_ratio':1e8}), # Add back later, once the design stabilizes
+    #(robustdiff, {'order':3, 'log_q':8, 'log_r':0}), # Add back when design stabilizes
     (velocity, {'gamma':0.5}), (velocity, [0.5]),
     (acceleration, {'gamma':1}), (acceleration, [1]),
     (jerk, {'gamma':10}), (jerk, [10]),
@@ -109,8 +109,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                    [(-25, -25), (0, -1), (0, 0), (1, 1)],
                    [(-25, -25), (1, 1), (0, 0), (1, 1)],
                    [(-25, -25), (3, 3), (0, 0), (3, 3)]],
-    iterated_second_order: [[(-9, -10), (-25, -25), (0, -1), (0, 0)],
-                           [(-9, -10), (-14, -14), (0, -1), (0, 0)],
+    iterated_second_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
+                           [(-7, -8), (-14, -14), (0, -1), (0, 0)],
                            [(-1, -1), (0, 0), (0, -1), (0, 0)],
                            [(0, 0), (1, 0), (0, 0), (1, 0)],
                            [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -121,8 +121,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                    [(-25, -25), (-2, -2), (0, 0), (1, 1)],
                    [(-25, -25), (1, 0), (0, 0), (1, 1)],
                    [(-25, -25), (2, 2), (0, 0), (2, 2)]],
-    iterated_fourth_order: [[(-9, -10), (-25, -25), (0, -1), (0, 0)],
-                            [(-9, -10), (-13, -13), (0, -1), (0, 0)],
+    iterated_fourth_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
+                            [(-7, -8), (-13, -13), (0, -1), (0, 0)],
                             [(-1, -1), (0, 0), (-1, -1), (0, 0)],
                             [(0, -1), (1, 1), (0, 0), (1, 1)],
                             [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -133,8 +133,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                [(-2, -2), (0, 0), (0, -1), (1, 1)],
                [(0, 0), (1, 1), (0, -1), (1, 1)],
                [(0, 0), (3, 3), (0, 0), (3, 3)]],
-    savgoldiff: [[(-9, -10), (-13, -14), (0, -1), (0, 0)],
-                 [(-9, -10), (-13, -13), (0, -1), (0, 0)],
+    savgoldiff: [[(-7, -7), (-13, -14), (0, -1), (0, 0)],
+                 [(-7, -7), (-13, -13), (0, -1), (0, 0)],
                  [(-2, -2), (-1, -1), (0, -1), (0, 0)],
                  [(0, -1), (0, 0), (0, 0), (1, 0)],
                  [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -151,7 +151,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                         [(0, 0), (1, 1), (0, 0), (1, 1)],
                         [(1, 0), (2, 2), (1, 0), (2, 2)],
                         [(1, 0), (3, 3), (1, 0), (3, 3)]],
-    spectraldiff: [[(-9, -10), (-14, -15), (-1, -1), (0, 0)],
+    spectraldiff: [[(-15, -15), (-14, -15), (0, -1), (0, 0)],
                    [(0, 0), (1, 1), (0, 0), (1, 1)],
                    [(1, 1), (1, 1), (1, 1), (1, 1)],
                    [(0, 0), (1, 1), (0, 0), (1, 1)],
@@ -181,14 +181,14 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
            [(0, 0), (1, 1), (0, 0), (1, 1)],
            [(1, 1), (2, 2), (1, 1), (2, 2)],
            [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    iterative_velocity: [[(-9, -10), (-25, -25), (0, -1), (0, 0)],
+    iterative_velocity: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
                          [(0, 0), (0, 0), (0, 0), (1, 0)],
                          [(0, 0), (1, 0), (1, 0), (1, 0)],
                          [(1, 0), (1, 1), (1, 0), (1, 1)],
                          [(2, 1), (2, 2), (2, 1), (2, 2)],
                          [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    smooth_acceleration: [[(-9, -10), (-18, -18), (0, -1), (0, 0)],
-                          [(-9, -9), (-10, -10), (-1, -1), (-1, -1)],
+    smooth_acceleration: [[(-7, -8), (-18, -18), (0, -1), (0, 0)],
+                          [(-7, -7), (-10, -10), (-1, -1), (-1, -1)],
                           [(-2, -2), (-1, -1), (-1, -1), (0, -1)],
                           [(0, 0), (1, 0), (0, -1), (1, 0)],
                           [(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -229,7 +229,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
                  [(-12, -12), (-2, -2), (0, -1), (1, 1)],
                  [(0, 0), (2, 2), (0, 0), (2, 2)],
                  [(1, 1), (3, 3), (1, 1), (3, 3)]],
-    lineardiff: [[(-6, -6), (-5, -6), (0, -1), (0, 0)],
+    lineardiff: [[(-7, -8), (-14, -14), (0, -1), (0, 0)],
                  [(0, 0), (2, 1), (0, 0), (2, 1)],
                  [(1, 0), (2, 2), (1, 0), (2, 2)],
                  [(1, 0), (2, 1), (1, 0), (2, 1)],
diff --git a/pynumdiff/tests/test_optimize.py b/pynumdiff/tests/test_optimize.py
index 7b8d480..1cc156f 100644
--- a/pynumdiff/tests/test_optimize.py
+++ b/pynumdiff/tests/test_optimize.py
@@ -25,30 +25,6 @@ def test_finite_difference():
     assert params1['num_iterations'] == 5
     assert params2['num_iterations'] == 1
 
-def test_mediandiff():
-    params1, val1 = optimize(mediandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
-    params2, val2 = optimize(mediandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
-    assert params1['window_size'] == 5
-    assert params2['window_size'] == 1
-
-def test_meandiff():
-    params1, val1 = optimize(meandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
-    params2, val2 = optimize(meandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
-    assert params1['window_size'] == 5
-    assert params2['window_size'] == 1
-
-def test_gaussiandiff():
-    params1, val1 = optimize(gaussiandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
-    params2, val2 = optimize(gaussiandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
-    assert params1['window_size'] == 9
-    assert params2['window_size'] == 1
-
-def test_friedrichsdiff():
-    params1, val1 = optimize(friedrichsdiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
-    params2, val2 = optimize(friedrichsdiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
-    assert params1['window_size'] == 9
-    assert params2['window_size'] == 1
-
 def test_iterative_velocity():
     params1, val1 = optimize(iterative_velocity, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto')
     params2, val2 = optimize(iterative_velocity, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto')
@@ -56,26 +32,6 @@ def test_iterative_velocity():
     np.testing.assert_almost_equal(params1['gamma'], 0.0001, decimal=4)
     np.testing.assert_almost_equal(params2['gamma'], 0.0001, decimal=4)
 
-def test_velocity():
-    try: import cvxpy
-    except: skip("could not import cvxpy, skipping test_velocity")
-
-    params1, val1 = optimize(velocity, x, dt, dxdt_truth=dxdt_truth, padding='auto', maxiter=20)
-    params2, val2 = optimize(velocity, x, dt, tvgamma=tvgamma, padding='auto', maxiter=20)
-
-    np.testing.assert_almost_equal(params1['gamma'], 0.0769, decimal=3)
-    np.testing.assert_almost_equal(params2['gamma'], 0.010, decimal=3)
-
-def test_acceleration():
-    try: import cvxpy
-    except: pytest.skip("could not import cvxpy, skipping test_acceleration")
-
-    params1, val1 = optimize(acceleration, x, dt, dxdt_truth=dxdt_truth, padding='auto', maxiter=20)
-    params2, val2 = optimize(acceleration, x, dt, tvgamma=tvgamma, padding='auto', maxiter=20)
-
-    np.testing.assert_almost_equal(params1['gamma'], 0.147, decimal=3)
-    np.testing.assert_almost_equal(params2['gamma'], 0.0046, decimal=4)
-
 def test_savgoldiff():
     params1, val1 = optimize(savgoldiff, x, dt, dxdt_truth=dxdt_truth, padding='auto')
     params2, val2 = optimize(savgoldiff, x, dt, tvgamma=tvgamma, padding='auto')
diff --git a/pynumdiff/tests/test_utils.py b/pynumdiff/tests/test_utils.py
index 8a07e0e..5d35631 100644
--- a/pynumdiff/tests/test_utils.py
+++ b/pynumdiff/tests/test_utils.py
@@ -22,16 +22,22 @@ def test_integrate_dxdt_hat():
 
 def test_estimate_integration_constant():
     """For known simple functions, make sure the initial condition is as expected"""
-    for x,x_hat,c in [([1.0, 2.0, 3.0, 4.0, 5.0], [0.0, 1.0, 2.0, 3.0, 4.0], 1), # Perfect alignment case, xhat shifted by 1
-        (np.ones(5)*10, np.ones(5)*5, 5),
-        ([0], [1], -1)]:
+    for x,x_hat,c in [(np.array([1.0, 2.0, 3.0, 4.0, 5.0]), np.array([0.0, 1.0, 2.0, 3.0, 4.0]), 1), # Perfect alignment case, xhat shifted by 1
+            (np.ones(5)*10, np.ones(5)*5, 5),
+            (np.array([0]), np.array([1]), -1)]:
         x0 = utility.estimate_integration_constant(x, x_hat)
         assert np.allclose(x0, float(c), rtol=1e-3)
 
-    np.random.seed(42) # Noisy case. Seed for reproducibility
-    x0 = utility.estimate_integration_constant([1.0, 2.0, 3.0, 4.0, 5.0],
-        np.array([0.0, 1.0, 2.0, 3.0, 4.0]) + np.random.normal(0, 0.1, 5))
-    assert 0.9 < x0 < 1.1 # The result should be close to 1.0, but not exactly due to noise
+    x_hat = np.sin(np.arange(400)*0.01)
+    x = x_hat + np.random.normal(0, 0.1, 400) + 1 # shift data by 1
+    x0 = utility.estimate_integration_constant(x, x_hat, M=float('inf'))
+    assert 0.95 < x0 < 1.05 # The result should be close to 1.0, but not exactly due to noise
+
+    x[100] = 100 # outlier case
+    x0 = utility.estimate_integration_constant(x, x_hat, M=0)
+    assert 0.95 < x0 < 1.05
+    x0 = utility.estimate_integration_constant(x, x_hat, M=6)
+    assert 0.95 < x0 < 1.05
 
 
 def test_convolutional_smoother():
@@ -63,19 +69,19 @@ def test_peakdet(request):
         pyplot.title('peakdet validataion')
         pyplot.show()
 
-    assert np.allclose(maxtab, [[0.473, 1.59843494], # these numbers validated by eye with --plot
-                                [1.795, 1.90920786],
-                                [3.314, -0.04585991],
-                                [4.992, 0.74798665],
-                                [6.345, 1.89597554],
-                                [7.778, 0.57190318],
-                                [9.424, 0.58764606]])
-    assert np.allclose(mintab, [[1.096, 0.30361178],
-                                [2.739, -1.12624328],
-                                [4.072, -2.00254655],
-                                [5.582, -0.31529832],
-                                [7.135, -0.58327787],
-                                [8.603, -1.71278265]])
+    assert np.allclose(maxtab, [[0.447, 1.58575613], # these numbers validated by eye with --plot
+                                [1.818, 1.91349239],
+                                [3.316,-0.02740252],
+                                [4.976, 0.74512778],
+                                [6.338, 1.89861691],
+                                [7.765, 0.57577842],
+                                [9.402, 0.59450898]])
+    assert np.allclose(mintab, [[1.139, 0.31325728],
+                                [2.752,-1.12769567],
+                                [4.098,-2.00326946],
+                                [5.507,-0.31714122],
+                                [7.211,-0.59708324],
+                                [8.612,-1.7118216 ]])
 
 def test_slide_function():
     """Verify the slide function's weighting scheme calculates as expected"""
@@ -90,6 +96,7 @@ def identity(x, dt): return x, 0 # should come back the same
 
 
 def test_simulations(request):
+    """Just sprint through running them all to make sure they go. Optionally plot with flag."""
     if request.config.getoption("--plot"):
         from matplotlib import pyplot
         fig, axes = pyplot.subplots(2, 3, figsize=(18,7), constrained_layout=True)
@@ -110,5 +117,13 @@ def test_simulations(request):
             ax.set_title(title, fontsize=18)
             if i == 5: ax.legend(loc='lower right', fontsize=12)
 
-# def test_evaluate():
-#     return
+
+def test_robust_rme():
+    """Ensure the robust error metric is the same as RMSE for big M, and that it does
+    better in the presence of outliers"""
+    u = np.sin(np.arange(100)*0.1)
+    v = u + np.random.randn(100)
+    assert np.allclose(evaluate.rmse(u, v), evaluate.robust_rme(u, v, M=6))
+
+    v[40] = 100 # throw an outlier in there
+    assert evaluate.robust_rme(u, v, M=2) < evaluate.rmse(u, v)
diff --git a/pynumdiff/total_variation_regularization/_total_variation_regularization.py b/pynumdiff/total_variation_regularization/_total_variation_regularization.py
index 9645af6..1dc76d9 100644
--- a/pynumdiff/total_variation_regularization/_total_variation_regularization.py
+++ b/pynumdiff/total_variation_regularization/_total_variation_regularization.py
@@ -30,9 +30,8 @@ def iterative_velocity(x, dt, params=None, options=None, num_iterations=None, ga
         :code:`'large'` has simpler numerics but is more efficient for large-scale problems. :code:`'large'`
         is more readily modified for higher-order derivatives, since the implicit differentiation matrix is square.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `num_iterations`, " +
@@ -65,9 +64,8 @@ def tvrdiff(x, dt, order, gamma, solver=None):
     :param str solver: Solver to use. Solver options include: 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS'.
                     In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     # Normalize for numerical consistency with convex solver
     mean = np.mean(x)
@@ -119,9 +117,8 @@ def velocity(x, dt, params=None, options=None, gamma=None, solver=None):
     :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc.
                 In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " +
@@ -148,9 +145,8 @@ def acceleration(x, dt, params=None, options=None, gamma=None, solver=None):
     :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc.
                 In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " +
@@ -177,9 +173,8 @@ def jerk(x, dt, params=None, options=None, gamma=None, solver=None):
     :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc.
                 In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " +
@@ -208,9 +203,8 @@ def smooth_acceleration(x, dt, params=None, options=None, gamma=None, window_siz
     :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc.
                 In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " +
@@ -246,9 +240,8 @@ def jerk_sliding(x, dt, params=None, options=None, gamma=None, solver=None, wind
                 In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
     :param int window_size: how wide to make the kernel
 
-    :return: tuple[np.array, np.array] of\n
-             - **x_hat** -- estimated (smoothed) x
-             - **dxdt_hat** -- estimated derivative of x
+    :return: - **x_hat** (np.array) -- estimated (smoothed) x
+             - **dxdt_hat** (np.array) -- estimated derivative of x
     """
     if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release.
         warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " +
diff --git a/pynumdiff/utils/evaluate.py b/pynumdiff/utils/evaluate.py
index c1e2e01..96f5c30 100644
--- a/pynumdiff/utils/evaluate.py
+++ b/pynumdiff/utils/evaluate.py
@@ -20,7 +20,7 @@ def plot(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth, xlim=None, show_error=True
     :param bool show_error: whether to show the rmse
     :param int markersize: marker size of noisy observations
 
-    :return: Display two plots
+    :return: (tuple) -- figure and axes
     """
     t = np.arange(len(x))*dt
     if xlim is None:
@@ -48,8 +48,8 @@ def plot(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth, xlim=None, show_error=True
     axes[1].legend(loc='lower right', fontsize=12)
 
     if show_error:
-        _, _, rms_dxdt = rmse(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth)
-        R_sqr = error_correlation(dxdt_hat, dxdt_truth)
+        rms_dxdt = rmse(dxdt_truth, dxdt_hat)
+        R_sqr = error_correlation(dxdt_truth, dxdt_hat)
         axes[1].text(0.05, 0.95, f"RMSE = {rms_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}",
                      transform=axes[1].transAxes, fontsize=15, verticalalignment='top')
     
@@ -70,71 +70,68 @@ def plot_comparison(dt, dxdt_truth, dxdt_hat1, title1, dxdt_hat2, title2, dxdt_h
         axes[i].tick_params(axis='y', labelsize=15)
         axes[i].set_title(title, fontsize=18)
         if i==2: axes[i].legend(loc='lower right', fontsize=12)
-        _, _, rms_dxdt = rmse(np.zeros(dxdt_hat.shape), dt, np.zeros(dxdt_hat.shape), dxdt_hat, np.zeros(dxdt_hat.shape), dxdt_truth)
-        R_sqr = error_correlation(dxdt_hat, dxdt_truth)
-        axes[i].text(0.05, 0.95, f"RMSE = {rms_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}",
+        rmse_dxdt = rmse(dxdt_truth, dxdt_hat)
+        R_sqr = error_correlation(dxdt_truth, dxdt_hat)
+        axes[i].text(0.05, 0.95, f"RMSE = {rmse_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}",
                      transform=axes[i].transAxes, fontsize=15, verticalalignment='top')
 
     fig.tight_layout()
 
 
-def rmse(x, dt, x_hat, dxdt_hat, x_truth=None, dxdt_truth=None, padding=0):
-    """Evaluate x_hat based on RMSE, calculating different ones depending on whether :code:`dxdt_truth`
-    and :code:`x_truth` are known.
+def robust_rme(x, x_hat, padding=0, M=6):
+    """Robustified/Huberized Root Mean Error metric, used to determine fit between smooth estimate and data.
+    Equals np.linalg.norm(x[s] - x_hat[s]) / np.sqrt(N) if M=float('inf'), and dang close for even M=6 or even 2.
 
-    :param np.array[float] x: data that was differentiated
-    :param float dt: step size
-    :param np.array[float] x_hat: estimated (smoothed) x
-    :param np.array[float] dxdt_hat: estimated xdot
-    :param np.array[float] x_truth: true value of x, if known
-    :param np.array[float] dxdt_truth: true value of dxdt, if known, optional
+    :param np.array[float] x: noisy data
+    :param np.array[float] x_hat: estimated smoothed signal, returned by differentiation algorithms in addition
+        to derivative
     :param int padding: number of snapshots on either side of the array to ignore when calculating
         the metric. If :code:`'auto'`, defaults to 2.5% of the size of x
+    :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation, intended to approximate
+        standard deviation robustly.
 
-    :return: tuple[float, float, float] containing\n
-            - **rms_rec_x** -- RMS error between the integral of dxdt_hat and x
-            - **rms_x** -- RMS error between x_hat and x_truth, returns None if x_truth is None
-            - **rms_dxdt** -- RMS error between dxdt_hat and dxdt_truth, returns None if dxdt_hat is None
+    :return: **robust_rmse_x_hat** (float) -- RMS error between x_hat and data
     """
-    if np.isnan(x_hat).any():
-        return np.nan, np.nan, np.nan
-    if padding == 'auto':
-        padding = int(0.025*len(x))
-        padding = max(padding, 1)
+    if padding == 'auto': padding = max([1, int(0.025*len(x))])
     s = slice(padding, len(x)-padding) # slice out data we want to measure
+    N = s.stop - s.start
+
+    sigma = stats.median_abs_deviation(x[s] - x_hat[s], scale='normal') # M is in units of this robust scatter metric
+    if sigma < 1e-6: sigma = 1 # guard against divide by zero
+    return np.sqrt(2*np.mean(utility.huber((x[s] - x_hat[s])/sigma, M))) * sigma
 
-    # RMS of dxdt and x_hat
-    root = np.sqrt(s.stop - s.start)
-    rms_dxdt = np.linalg.norm(dxdt_hat[s] - dxdt_truth[s]) / root if dxdt_truth is not None else None
-    rms_x = np.linalg.norm(x_hat[s] - x_truth[s]) / root if x_truth is not None else None
 
-    # RMS reconstructed x from integrating dxdt vs given raw x, available even in the absence of ground truth
-    rec_x = utility.integrate_dxdt_hat(dxdt_hat, dt)
-    x0 = utility.estimate_integration_constant(x, rec_x)
-    rec_x = rec_x + x0
-    rms_rec_x = np.linalg.norm(rec_x[s] - x[s]) / root
+def rmse(dxdt_truth, dxdt_hat, padding=0):
+    """True RMSE between vectors
+
+    :param np.array[float] dxdt_truth: known true derivative 
+    :param np.array[float] dxdt_hat: estimated derivative 
+    :param int padding: number of snapshots on either side of the array to ignore when calculating
+        the metric. If :code:`'auto'`, defaults to 2.5% of the size of x
+
+    :return: **true_rmse_dxdt** (float) -- RMS error between dxdt_hat and dxdt_truth, returns None if dxdt_hat is None
+    """
+    if padding == 'auto': padding = max([1, int(0.025*len(dxdt_truth))])
+    s = slice(padding, len(dxdt_hat)-padding) # slice out data we want to measure
+    N = s.stop - s.start
 
-    return rms_rec_x, rms_x, rms_dxdt
+    return np.linalg.norm(dxdt_hat[s] - dxdt_truth[s]) / np.sqrt(N) if dxdt_truth is not None else None
 
 
-def error_correlation(dxdt_hat, dxdt_truth, padding=0):
-    """Calculate the error correlation (pearsons correlation coefficient) between the estimated
-    dxdt and true dxdt
+def error_correlation(dxdt_truth, dxdt_hat, padding=0):
+    """Calculate the error correlation (pearsons correlation coefficient) between vectors
 
-    :param np.array[float] dxdt_hat: estimated xdot
     :param np.array[float] dxdt_truth: true value of dxdt, if known, optional
+    :param np.array[float] dxdt_hat: estimated xdot
     :param int padding: number of snapshots on either side of the array to ignore when calculating
         the metric. If :code:`'auto'`, defaults to 2.5% of the size of x
 
     :return: (float) -- r-squared correlation coefficient
     """
-    if padding == 'auto':
-        padding = int(0.025*len(dxdt_hat))
-        padding = max(padding, 1)
+    if padding == 'auto': padding = max(1, int(0.025*len(dxdt_hat)))
     s = slice(padding, len(dxdt_hat)-padding) # slice out data we want to measure
-    errors = (dxdt_hat[s] - dxdt_truth[s])
-    r = stats.linregress(dxdt_truth[s] - np.mean(dxdt_truth[s]), errors)
-    return r.rvalue**2
+    
+    return stats.linregress(dxdt_truth[s], dxdt_hat[s] - dxdt_truth[s]).rvalue**2
 
 
 def total_variation(x, padding=0):
@@ -146,11 +143,7 @@ def total_variation(x, padding=0):
 
     :return: (float) -- total variation
     """
-    if np.isnan(x).any():
-        return np.nan
-    if padding == 'auto':
-        padding = int(0.025*len(x))
-        padding = max(padding, 1)
+    if padding == 'auto': padding = max(1, int(0.025*len(x)))
     x = x[padding:len(x)-padding]
     
     return np.linalg.norm(x[1:]-x[:-1], 1)/len(x) # normalized version of what cvxpy.tv does
diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py
index ce275da..f5b1a71 100644
--- a/pynumdiff/utils/utility.py
+++ b/pynumdiff/utils/utility.py
@@ -2,6 +2,7 @@
 import numpy as np
 from scipy.integrate import cumulative_trapezoid
 from scipy.optimize import minimize
+from scipy.stats import median_abs_deviation
 
 
 def hankel_matrix(x, num_delays, pad=False): # fixed delay step of 1
@@ -96,9 +97,14 @@ def peakdet(x, delta, t=None):
     return np.array(maxtab), np.array(mintab)
 
 
-# Trapazoidal integration, with 0 first value so that the lengths match. See #88.
+def huber(x, M):
+    """Huber loss function, for outlier-robust applications, `see here <https://www.cvxpy.org/api_reference/cvxpy.atoms.elementwise.html#huber>`_"""
+    absx = np.abs(x)
+    return np.where(absx <= M, 0.5*x**2, M*(absx - 0.5*M))
+
+
 def integrate_dxdt_hat(dxdt_hat, _t):
-    """Wrapper for scipy.integrate.cumulative_trapezoid
+    """Wrapper for scipy.integrate.cumulative_trapezoid. Use 0 as first value so lengths match, see #88.
 
     :param np.array[float] dxdt_hat: estimate derivative of timeseries
     :param float _t: step size if given as a scalar or a vector of sample locations
@@ -109,22 +115,30 @@ def integrate_dxdt_hat(dxdt_hat, _t):
             else cumulative_trapezoid(dxdt_hat, x=_t, initial=0)
 
 
-# Optimization routine to estimate the integration constant.
-def estimate_integration_constant(x, x_hat):
+def estimate_integration_constant(x, x_hat, M=6):
     """Integration leaves an unknown integration constant. This function finds a best fit integration
     constant given x and x_hat (the integral of dxdt_hat) by optimizing :math:`\\min_c ||x - \\hat{x} + c||_2`.
 
     :param np.array[float] x: timeseries of measurements
-    :param np.array[float] x_hat: smoothed estiamte of x, for the purpose of this function this should have been determined
-        by integrate_dxdt_hat
+    :param np.array[float] x_hat: smoothed estimate of x
+    :param float M: robustifies constant estimation using Huber loss. The default is intended to capture the idea
+        of "six sigma": Assuming Gaussian inliers and M in units of standard deviation, the portion of inliers
+        beyond the Huber loss' transition is only about 1.97e-9. M here is in units of scaled mean absolute deviation,
+        so scatter can be calculated and used to normalize data without being thrown off by outliers.
 
     :return: **integration constant** (float) -- initial condition that best aligns x_hat with x
     """
-    return minimize(lambda x0, x, xhat: np.linalg.norm(x - (x_hat+x0)), # fn to minimize in 1st argument
-        0, args=(x, x_hat), method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar
+    if M == float('inf'): # calculates the constant to be mean(diff(x, x_hat)), equivalent to argmin_{x0} ||x_hat + x0 - x||_2^2
+        return np.mean(x - x_hat) # Solves the L2 distance minimization
+    elif M < 0.1: # small M looks like L1 loss, and Huber gets too flat to work well
+        return np.median(x - x_hat) # Solves the L1 distance minimization
+    else:
+        sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric
+        if sigma < 1e-6: sigma = 1 # guard against divide by zero
+        return minimize(lambda x0: np.mean(huber((x - (x_hat+x0))/sigma, M)), # fn to minimize in 1st argument
+            0, method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar
 
 
-# kernels
 def mean_kernel(window_size):
     """A uniform boxcar of total integral 1"""
     return np.ones(window_size)/window_size
@@ -142,6 +156,7 @@ def friedrichs_kernel(window_size):
     ker = np.exp(-1/(1-x**2))
     return ker / np.sum(ker)
 
+
 def convolutional_smoother(x, kernel, num_iterations=1):
     """Perform smoothing by convolving x with a kernel.