diff --git a/pynumdiff/kalman_smooth/_kalman_smooth.py b/pynumdiff/kalman_smooth/_kalman_smooth.py index de399d8..ee9f65f 100644 --- a/pynumdiff/kalman_smooth/_kalman_smooth.py +++ b/pynumdiff/kalman_smooth/_kalman_smooth.py @@ -1,12 +1,12 @@ import numpy as np from warnings import warn from scipy.linalg import expm, sqrtm -from scipy.stats import norm from time import time - try: import cvxpy except ImportError: pass +from pynumdiff.utils.utility import huber_const + def kalman_filter(y, dt_or_t, xhat0, P0, A, Q, C, R, B=None, u=None, save_P=True): """Run the forward pass of a Kalman filter, with regular or irregular step size. @@ -278,7 +278,7 @@ def robustdiff(x, dt, order, log_q, log_r, proc_huberM=6, meas_huberM=0): proper :math:`\\ell_1` normalization. Note that :code:`log_q` and :code:`proc_huberM` are coupled, as are :code:`log_r` and :code:`meas_huberM`, via the relation - :math:`\\text{Huber}(q^{-1/2}v, M) = q^{-1}\\text{Huber}(v, Mq^{-1/2})`, but these are still independent enough that for + :math:`\\text{Huber}(q^{-1/2}v, M) = q^{-1}\\text{Huber}(v, Mq^{1/2})`, but these are still independent enough that for the purposes of optimization we cannot collapse them. Nor can :code:`log_q` and :code:`log_r` be combined into :code:`log_qr_ratio` as in RTS smoothing without the addition of a new absolute scale parameter, becausee :math:`q` and :math:`r` interact with the distinct Huber :math:`M` parameters. @@ -329,11 +329,6 @@ def convex_smooth(y, A, Q, C, R, B=None, u=None, proc_huberM=6, meas_huberM=0): x_states = cvxpy.Variable((A.shape[0], N)) # each column is [position, velocity, acceleration, ...] at step n control = isinstance(B, np.ndarray) and isinstance(u, np.ndarray) # whether there is a control input - def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt - a = 2*np.exp(-M**2 / 2)/M # huber_const smoothly transitions Huber between 1-norm and 2-norm squared cases - b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1) - return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b)) - # It is extremely important to run time that CVXPY expressions be in vectorized form proc_resids = np.linalg.inv(sqrtm(Q)) @ (x_states[:,1:] - A @ x_states[:,:-1] - (0 if not control else B @ u[1:].T)) # all Q^(-1/2)(x_n - (A x_{n-1} + B u_n)) meas_resids = np.linalg.inv(sqrtm(R)) @ (y.reshape(C.shape[0],-1) - C @ x_states) # all R^(-1/2)(y_n - C x_n) diff --git a/pynumdiff/optimize/_optimize.py b/pynumdiff/optimize/_optimize.py index 125f3ae..eacef4a 100644 --- a/pynumdiff/optimize/_optimize.py +++ b/pynumdiff/optimize/_optimize.py @@ -65,8 +65,10 @@ {'sigma': (1e-2, 1e3), 'lmbd': (1e-3, 0.5)}), tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000], - 'order': {1, 2, 3}}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE - {'gamma': (1e-4, 1e7)}), + 'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE + 'huberM': [0., 1, 2, 6]}, # comb lower values more finely, because the scale of sigma is mad(x), bigger than mad(y-x) residuals + {'gamma': (1e-4, 1e7), + 'huberM': (0, 6)}), velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]}, # Deprecated method {'gamma': (1e-4, 1e7)}), iterative_velocity: ({'scale': 'small', # Rare to optimize this one, because it's longer-running than convex version diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index 4597a89..025774d 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -57,7 +57,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) (jerk, {'gamma':10}), (jerk, [10]), (iterative_velocity, {'num_iterations':5, 'gamma':0.05}), (iterative_velocity, [5, 0.05]), (smooth_acceleration, {'gamma':2, 'window_size':5}), (smooth_acceleration, [2, 5]), - (lineardiff, {'order':3, 'gamma':5, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 5, 11], {'solver':'CLARABEL'}) + (lineardiff, {'order':3, 'gamma':0.01, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 0.01, 11], {'solver':'CLARABEL'}) ] # All the testing methodology follows the exact same pattern; the only thing that changes is the @@ -108,8 +108,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-25, -25), (0, -1), (0, 0), (1, 1)], [(-25, -25), (1, 1), (0, 0), (1, 1)], [(-25, -25), (3, 3), (0, 0), (3, 3)]], - iterated_second_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)], - [(-7, -8), (-14, -14), (0, -1), (0, 0)], + iterated_second_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)], + [(-14, -14), (-14, -14), (0, -1), (0, 0)], [(-1, -1), (0, 0), (0, -1), (0, 0)], [(0, 0), (1, 0), (0, 0), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -120,8 +120,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-25, -25), (-2, -2), (0, 0), (1, 1)], [(-25, -25), (1, 0), (0, 0), (1, 1)], [(-25, -25), (2, 2), (0, 0), (2, 2)]], - iterated_fourth_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)], - [(-7, -8), (-13, -13), (0, -1), (0, 0)], + iterated_fourth_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)], + [(-14, -14), (-13, -13), (0, -1), (0, 0)], [(-1, -1), (0, 0), (-1, -1), (0, 0)], [(0, -1), (1, 1), (0, 0), (1, 1)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -132,8 +132,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-2, -2), (0, 0), (0, -1), (1, 1)], [(0, 0), (1, 1), (0, -1), (1, 1)], [(0, 0), (3, 3), (0, 0), (3, 3)]], - savgoldiff: [[(-7, -7), (-13, -14), (0, -1), (0, 0)], - [(-7, -7), (-13, -13), (0, -1), (0, 0)], + savgoldiff: [[(-13, -14), (-13, -14), (0, -1), (0, 0)], + [(-13, -13), (-13, -13), (0, -1), (0, 0)], [(-2, -2), (-1, -1), (0, -1), (0, 0)], [(0, -1), (0, 0), (0, 0), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -164,16 +164,16 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(1, 1), (3, 3), (1, 1), (3, 3)]], velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)], [(-12, -12), (-11, -12), (-1, -1), (-1, -2)], - [(0, 0), (1, 0), (0, 0), (1, 0)], + [(0, -1), (1, 0), (0, -1), (1, 0)], [(0, -1), (1, 1), (0, 0), (1, 0)], - [(1, 1), (2, 2), (1, 1), (2, 2)], - [(1, 0), (3, 3), (1, 0), (3, 3)]], - acceleration: [[(-25, -25), (-18, -18), (0, -1), (0, 0)], + [(1, 0), (2, 2), (1, 0), (2, 2)], + [(0, 0), (3, 3), (0, 0), (3, 3)]], + acceleration: [[(-25, -25), (-18, -18), (0, -1), (1, 0)], [(-10, -10), (-9, -9), (-1, -1), (0, -1)], [(-10, -10), (-9, -10), (-1, -1), (0, -1)], [(0, -1), (1, 0), (0, -1), (1, 0)], - [(1, 1), (2, 2), (1, 1), (2, 2)], - [(1, 1), (3, 3), (1, 1), (3, 3)]], + [(1, 0), (2, 2), (1, 0), (2, 2)], + [(0, 0), (3, 3), (0, 0), (3, 3)]], jerk: [[(-25, -25), (-18, -18), (-1, -1), (0, 0)], [(-9, -10), (-9, -9), (-1, -1), (0, 0)], [(-10, -10), (-9, -10), (-1, -1), (0, 0)], @@ -186,8 +186,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(1, 0), (1, 1), (1, 0), (1, 1)], [(2, 1), (2, 2), (2, 1), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - smooth_acceleration: [[(-7, -8), (-18, -18), (0, -1), (0, 0)], - [(-7, -7), (-10, -10), (-1, -1), (-1, -1)], + smooth_acceleration: [[(-25, -25), (-21, -21), (0, -1), (0, 0)], + [(-10, -11), (-10, -10), (-1, -1), (-1, -1)], [(-2, -2), (-1, -1), (-1, -1), (0, -1)], [(0, 0), (1, 0), (0, -1), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -222,12 +222,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-7, -7), (-2, -2), (0, -1), (1, 1)], [(0, 0), (2, 2), (0, 0), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - lineardiff: [[(-7, -8), (-14, -14), (0, -1), (0, 0)], + lineardiff: [[(-3, -4), (-3, -3), (0, -1), (1, 0)], + [(-1, -2), (0, 0), (0, -1), (1, 0)], + [(-1, -1), (0, 0), (0, -1), (1, 1)], + [(-1, -2), (0, 0), (0, -1), (1, 1)], [(0, 0), (2, 1), (0, 0), (2, 1)], - [(1, 0), (2, 2), (1, 0), (2, 2)], - [(1, 0), (2, 1), (1, 0), (2, 1)], - [(1, 1), (2, 2), (1, 1), (2, 2)], - [(1, 1), (3, 3), (1, 1), (3, 3)]] + [(0, -1), (3, 3), (0, 0), (3, 3)]] } # Essentially run the cartesian product of [diff methods] x [test functions] through this one test diff --git a/pynumdiff/total_variation_regularization/_total_variation_regularization.py b/pynumdiff/total_variation_regularization/_total_variation_regularization.py index e21fe7d..384b1af 100644 --- a/pynumdiff/total_variation_regularization/_total_variation_regularization.py +++ b/pynumdiff/total_variation_regularization/_total_variation_regularization.py @@ -1,5 +1,6 @@ import numpy as np from warnings import warn +from scipy.stats import median_abs_deviation from pynumdiff.total_variation_regularization import _chartrand_tvregdiff from pynumdiff.utils import utility @@ -53,7 +54,7 @@ def iterative_velocity(x, dt, params=None, options=None, num_iterations=None, ga return x_hat, dxdt_hat -def tvrdiff(x, dt, order, gamma, solver=None): +def tvrdiff(x, dt, order, gamma, huberM=float('inf'), solver=None): """Generalized total variation regularized derivatives. Use convex optimization (cvxpy) to solve for a total variation regularized derivative. Other convex-solver-based methods in this module call this function. @@ -61,17 +62,20 @@ def tvrdiff(x, dt, order, gamma, solver=None): :param float dt: step size :param int order: 1, 2, or 3, the derivative to regularize :param float gamma: regularization parameter + :param float huberM: Huber loss parameter, in units of scaled median absolute deviation of input data, :code:`x`. + :math:`M = \\infty` reduces to :math:`\\ell_2` loss squared on first, fidelity cost term, and + :math:`M = 0` reduces to :math:`\\ell_1` loss. :param str solver: Solver to use. Solver options include: 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS'. - In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. + If not given, fall back to CVXPY's default. :return: - **x_hat** (np.array) -- estimated (smoothed) x - **dxdt_hat** (np.array) -- estimated derivative of x """ # Normalize for numerical consistency with convex solver - mean = np.mean(x) - std = np.std(x) - if std == 0: std = 1 # safety guard - x = (x-mean)/std + mu = np.mean(x) + sigma = median_abs_deviation(x, scale='normal') # robust alternative to std() + if sigma == 0: sigma = 1 # safety guard + x = (x-mu)/sigma # Define the variables for the highest order derivative and the integration constants deriv_values = cvxpy.Variable(len(x)) # values of the order^th derivative, in which we're penalizing variation @@ -84,10 +88,13 @@ def tvrdiff(x, dt, order, gamma, solver=None): for i in range(order): y = cvxpy.cumsum(y) + integration_constants[i] + # Compare the recursively integrated position to the noisy position. \ell_2 doesn't get scaled by 1/2 here, + # so cvxpy's doubled Huber is already the right scale, and \ell_1 should be scaled by 2\sqrt{2} to match. + fidelity_cost = cvxpy.sum_squares(y - x) if huberM == float('inf') \ + else np.sqrt(8)*cvxpy.norm(y - x, 1) if huberM == 0 \ + else utility.huber_const(huberM)*cvxpy.sum(cvxpy.huber(y - x, huberM*sigma)) # Set up and solve the optimization problem - prob = cvxpy.Problem(cvxpy.Minimize( - # Compare the recursively integrated position to the noisy position, and add TVR penalty - cvxpy.sum_squares(y - x) + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) )) + prob = cvxpy.Problem(cvxpy.Minimize(fidelity_cost + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) )) prob.solve(solver=solver) # Recursively integrate the final derivative values to get back to the function and derivative values @@ -102,7 +109,7 @@ def tvrdiff(x, dt, order, gamma, solver=None): dxdt_hat = (dxdt_hat[:-1] + dxdt_hat[1:])/2 dxdt_hat = np.hstack((dxdt_hat, 2*dxdt_hat[-1] - dxdt_hat[-2])) # last value = penultimate value [-1] + diff between [-1] and [-2] - return x_hat*std+mean, dxdt_hat*std # derivative is linear, so scale derivative by std + return x_hat*sigma+mu, dxdt_hat*sigma # derivative is linear, so scale derivative by scatter def velocity(x, dt, params=None, options=None, gamma=None, solver=None): diff --git a/pynumdiff/utils/evaluate.py b/pynumdiff/utils/evaluate.py index 0af9d20..b755a4b 100644 --- a/pynumdiff/utils/evaluate.py +++ b/pynumdiff/utils/evaluate.py @@ -86,18 +86,16 @@ def robust_rme(u, v, padding=0, M=6): :param np.array[float] v: e.g. estimated smoothed signal, reconstructed from derivative :param int padding: number of snapshots on either side of the array to ignore when calculating the metric. If :code:`'auto'`, defaults to 2.5% of the size of inputs - :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation, intended to approximate - standard deviation robustly. + :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation of population of residual + errors, intended to approximate standard deviation robustly. :return: (float) -- Robust root mean error between u and v """ if padding == 'auto': padding = max(1, int(0.025*len(u))) s = slice(padding, len(u)-padding) # slice out data we want to measure - N = s.stop - s.start sigma = stats.median_abs_deviation(u[s] - v[s], scale='normal') # M is in units of this robust scatter metric - if sigma < 1e-6: sigma = 1 # guard against divide by zero - return np.sqrt(2*np.mean(utility.huber((u[s] - v[s])/sigma, M))) * sigma + return np.sqrt(2*np.mean(utility.huber(u[s] - v[s], M*sigma))) def rmse(u, v, padding=0): diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py index 3117d95..c0c4af4 100644 --- a/pynumdiff/utils/utility.py +++ b/pynumdiff/utils/utility.py @@ -2,7 +2,7 @@ import numpy as np from scipy.integrate import cumulative_trapezoid from scipy.optimize import minimize -from scipy.stats import median_abs_deviation +from scipy.stats import median_abs_deviation, norm def hankel_matrix(x, num_delays, pad=False): # fixed delay step of 1 @@ -102,6 +102,13 @@ def huber(x, M): absx = np.abs(x) return np.where(absx <= M, 0.5*x**2, M*(absx - 0.5*M)) +def huber_const(M): + """Scale that makes :code:`sum(huber())` interpolate :math:`\\sqrt{2}\\|\\cdot\\|_1` and :math:`\\frac{1}{2}\\|\\cdot\\|_2^2`, + from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt""" + a = 2*np.exp(-M**2 / 2)/M + b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1) + return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b)) + def integrate_dxdt_hat(dxdt_hat, dt_or_t): """Wrapper for scipy.integrate.cumulative_trapezoid. Use 0 as first value so lengths match, see #88. @@ -123,21 +130,20 @@ def estimate_integration_constant(x, x_hat, M=6): :param np.array[float] x: timeseries of measurements :param np.array[float] x_hat: smoothed estimate of x - :param float M: robustifies constant estimation using Huber loss. The default is intended to capture the idea - of "six sigma": Assuming Gaussian inliers and M in units of standard deviation, the portion of inliers - beyond the Huber loss' transition is only about 1.97e-9. M here is in units of scaled mean absolute deviation, - so scatter can be calculated and used to normalize data without being thrown off by outliers. + :param float M: constant estimation is robustified with the Huber loss. :code:`M` here is in units of scaled + mean absolute deviation of residuals, so scatter can be calculated and used to normalize without being + thrown off by outliers. The default is intended to capture the idea of "six sigma": Assuming Gaussian + :code:`x - xhat` errors, the portion of inliers beyond the Huber loss' transition is only about 1.97e-9. :return: **integration constant** (float) -- initial condition that best aligns x_hat with x """ - if M == float('inf'): # calculates the constant to be mean(diff(x, x_hat)), equivalent to argmin_{x0} ||x_hat + x0 - x||_2^2 - return np.mean(x - x_hat) # Solves the L2 distance minimization - elif M < 0.1: # small M looks like L1 loss, and Huber gets too flat to work well - return np.median(x - x_hat) # Solves the L1 distance minimization + sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric + if M == float('inf') or sigma < 1e-3: # If no scatter, then no outliers, so use L2 + return np.mean(x - x_hat) # Solves the l2 distance minimization, argmin_{x0} ||x_hat + x0 - x||_2^2 + elif M < 1e-3: # small M looks like l1 loss, and Huber gets too flat to work well + return np.median(x - x_hat) # Solves the l1 distance minimization, argmin_{x0} ||x_hat + x0 - x||_1 else: - sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric - if sigma < 1e-6: sigma = 1 # guard against divide by zero - return minimize(lambda x0: np.mean(huber((x - (x_hat+x0))/sigma, M)), # fn to minimize in 1st argument + return minimize(lambda x0: np.sum(huber(x - (x_hat+x0), M*sigma)), # fn to minimize in 1st argument 0, method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar