diff --git a/pynumdiff/basis_fit/_basis_fit.py b/pynumdiff/basis_fit/_basis_fit.py index 81e1abe..5a22c48 100644 --- a/pynumdiff/basis_fit/_basis_fit.py +++ b/pynumdiff/basis_fit/_basis_fit.py @@ -19,9 +19,8 @@ def spectraldiff(x, dt, params=None, options=None, high_freq_cutoff=None, even_e :param bool pad_to_zero_dxdt: if True, extend the data with extra regions that smoothly force the derivative to zero before taking FFT. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `high_freq_cutoff`, " + @@ -52,27 +51,25 @@ def spectraldiff(x, dt, params=None, options=None, high_freq_cutoff=None, even_e if even_extension is True: x = np.hstack((x, x[::-1])) - # If odd, make N even, and pad x + # Form wavenumbers N = len(x) - - # Define the frequency range. k = np.concatenate((np.arange(N//2 + 1), np.arange(-N//2 + 1, 0))) if N % 2 == 0: k[N//2] = 0 # odd derivatives get the Nyquist element zeroed out - omega = k*2*np.pi/(dt*N) # turn wavenumbers into frequencies in radians/s - # Frequency based smoothing: remove signals with a frequency higher than high_freq_cutoff + # Filter to zero out higher wavenumbers discrete_cutoff = int(high_freq_cutoff*N/2) # Nyquist is at N/2 location, and we're cutting off as a fraction of that - omega[discrete_cutoff:N-discrete_cutoff] = 0 + filt = np.ones(k.shape); filt[discrete_cutoff:N-discrete_cutoff] = 0 + + # Smoothed signal + X = np.fft.fft(x) + x_hat = np.real(np.fft.ifft(filt * X)) + x_hat = x_hat[padding:L+padding] # Derivative = 90 deg phase shift - dxdt_hat = np.real(np.fft.ifft(1.0j * omega * np.fft.fft(x))) + omega = 2*np.pi/(dt*N) # factor of 2pi/T turns wavenumbers into frequencies in radians/s + dxdt_hat = np.real(np.fft.ifft(1j * k * omega * filt * X)) dxdt_hat = dxdt_hat[padding:L+padding] - # Integrate to get x_hat - x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt) - x0 = utility.estimate_integration_constant(x[padding:L+padding], x_hat) - x_hat = x_hat + x0 - return x_hat, dxdt_hat @@ -88,9 +85,8 @@ def rbfdiff(x, _t, sigma=1, lmbd=0.01): :param float sigma: controls width of radial basis functions :param float lmbd: controls smoothness - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if np.isscalar(_t): t = np.arange(len(x))*_t diff --git a/pynumdiff/finite_difference/_finite_difference.py b/pynumdiff/finite_difference/_finite_difference.py index 59e8c2b..4fd926b 100644 --- a/pynumdiff/finite_difference/_finite_difference.py +++ b/pynumdiff/finite_difference/_finite_difference.py @@ -13,6 +13,9 @@ def finitediff(x, dt, num_iterations=1, order=2): :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times :param int order: 1, 2, or 4, controls which finite differencing scheme to employ + + :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat + - **dxdt_hat** (np.array) -- estimated derivative of x """ if num_iterations < 1: raise ValueError("num_iterations must be >0") if order not in [1, 2, 4]: raise ValueError("order must be 1, 2, or 4") @@ -59,7 +62,7 @@ def finitediff(x, dt, num_iterations=1, order=2): dxdt_hat /= dt # don't forget to scale by dt, can't skip it this time if num_iterations > 1: # We've lost a constant of integration in the above - x_hat += utility.estimate_integration_constant(x, x_hat) # uses least squares + x_hat += utility.estimate_integration_constant(x, x_hat) return x_hat, dxdt_hat @@ -75,9 +78,8 @@ def first_order(x, dt, params=None, options={}, num_iterations=1): :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times - :return: tuple[np.array, np.array] of\n - - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat + - **dxdt_hat** (np.array) -- estimated derivative of x """ warn("`first_order` in past releases was actually calculating a second-order FD. Use `second_order` to achieve " + "approximately the same behavior. Note that odd-order methods have asymmetrical stencils, which causes " + @@ -99,9 +101,8 @@ def second_order(x, dt, num_iterations=1): :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times - :return: tuple[np.array, np.array] of\n - - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat + - **dxdt_hat** (np.array) -- estimated derivative of x """ warn("`second_order` is deprecated. Call `finitediff` with order 2 instead.", DeprecationWarning) return finitediff(x, dt, num_iterations, 2) @@ -116,9 +117,8 @@ def fourth_order(x, dt, num_iterations=1): :param int num_iterations: number of iterations. If >1, the derivative is integrated with trapezoidal rule, that result is finite-differenced again, and the cycle is repeated num_iterations-1 times - :return: tuple[np.array, np.array] of\n - - **x_hat** -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- original x if :code:`num_iterations=1`, else smoothed x that yielded dxdt_hat + - **dxdt_hat** (np.array) -- estimated derivative of x """ warn("`fourth_order` is deprecated. Call `finitediff` with order 4 instead.", DeprecationWarning) return finitediff(x, dt, num_iterations, 4) diff --git a/pynumdiff/kalman_smooth/_kalman_smooth.py b/pynumdiff/kalman_smooth/_kalman_smooth.py index 93266b6..31a9393 100644 --- a/pynumdiff/kalman_smooth/_kalman_smooth.py +++ b/pynumdiff/kalman_smooth/_kalman_smooth.py @@ -25,12 +25,11 @@ def kalman_filter(y, _t, xhat0, P0, A, Q, C, R, B=None, u=None, save_P=True): :param bool save_P: whether to save history of error covariance and a priori state estimates, used with rts smoothing but nonstandard to compute for ordinary filtering - :return: tuple[np.array, np.array, np.array, np.array] of\n - - **xhat_pre** -- a priori estimates of xhat, with axis=0 the batch dimension, so xhat[n] gets the nth step - - **xhat_post** -- a posteriori estimates of xhat - - **P_pre** -- a priori estimates of P - - **P_post** -- a posteriori estimates of P - if :code:`save_P` is :code:`True` else only **xhat_post** to save memory + :return: - **xhat_pre** (np.array) -- a priori estimates of xhat, with axis=0 the batch dimension, so xhat[n] gets the nth step + - **xhat_post** (np.array) -- a posteriori estimates of xhat + - **P_pre** (np.array) -- a priori estimates of P + - **P_post** (np.array) -- a posteriori estimates of P + if :code:`save_P` is :code:`True` else only **xhat_post** to save memory """ N = y.shape[0] m = xhat0.shape[0] # dimension of the state @@ -84,16 +83,15 @@ def rts_smooth(_t, A, xhat_pre, xhat_post, P_pre, P_post, compute_P_smooth=True) :param float or array[float] _t: This function supports variable step size. This parameter is either the constant step size if given as a single float, or sample locations if given as an array of same length as the state histories. :param np.array A: state transition matrix, in discrete time if constant dt, in continuous time if variable dt - :param list[np.array] xhat_pre: a priori estimates of xhat from a kalman_filter forward pass - :param list[np.array] xhat_post: a posteriori estimates of xhat from a kalman_filter forward pass - :param list[np.array] P_pre: a priori estimates of P from a kalman_filter forward pass - :param list[np.array] P_post: a posteriori estimates of P from a kalman_filter forward pass + :param np.array xhat_pre: a priori estimates of xhat from a kalman_filter forward pass + :param np.array xhat_post: a posteriori estimates of xhat from a kalman_filter forward pass + :param np.array P_pre: a priori estimates of P from a kalman_filter forward pass + :param np.array P_post: a posteriori estimates of P from a kalman_filter forward pass :param bool compute_P_smooth: it's extra work if you don't need it - :return: tuple[np.array, np.array] of\n - - **xhat_smooth** -- RTS smoothed xhat - - **P_smooth** -- RTS smoothed P - if :code:`compute_P_smooth` is :code:`True` else only **xhat_smooth** + :return: - **xhat_smooth** (np.array) -- RTS smoothed xhat + - **P_smooth** (np.array) -- RTS smoothed P estimates + if :code:`compute_P_smooth` is :code:`True` else only **xhat_smooth** """ xhat_smooth = np.empty(xhat_post.shape); xhat_smooth[-1] = xhat_post[-1] # preallocate arrays if compute_P_smooth: P_smooth = np.empty(P_post.shape); P_smooth[-1] = P_post[-1] @@ -123,9 +121,8 @@ def rtsdiff(x, _t, order, log_qr_ratio, forwardbackward): :param bool forwardbackward: indicates whether to run smoother forwards and backwards (usually achieves better estimate at end points) - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if not np.isscalar(_t) and len(x) != len(_t): raise ValueError("If `_t` is given as array-like, must have same length as `x`.") @@ -185,9 +182,8 @@ def constant_velocity(x, dt, params=None, options=None, r=None, q=None, forwardb :param bool forwardbackward: indicates whether to run smoother forwards and backwards (usually achieves better estimate at end points) - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # boilerplate backwards compatibility code warn("`params` and `options` parameters will be removed in a future version. Use `r`, " + @@ -216,9 +212,8 @@ def constant_acceleration(x, dt, params=None, options=None, r=None, q=None, forw :param bool forwardbackward: indicates whether to run smoother forwards and backwards (usually achieves better estimate at end points) - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # boilerplate backwards compatibility code warn("`params` and `options` parameters will be removed in a future version. Use `r`, " + @@ -247,9 +242,8 @@ def constant_jerk(x, dt, params=None, options=None, r=None, q=None, forwardbackw :param bool forwardbackward: indicates whether to run smoother forwards and backwards (usually achieves better estimate at end points) - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # boilerplate backwards compatibility code warn("`params` and `options` parameters will be removed in a future version. Use `r`, " + @@ -290,13 +284,9 @@ def robustdiff(x, dt, order, log_q, log_r, proc_huberM=6, meas_huberM=0): :param float proc_huberM: quadratic-to-linear transition point for process loss :param float meas_huberM: quadratic-to-linear transition point for measurement loss - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ - #q = 1e4 # I found q too small worsened condition number of the Q matrix, so fixing it at a biggish value - #r = q/qr_ratio - A_c = np.diag(np.ones(order), 1) # continuous-time A just has 1s on the first diagonal (where 0th is main diagonal) Q_c = np.zeros(A_c.shape); Q_c[-1,-1] = 10**log_q # continuous-time uncertainty around the last derivative C = np.zeros((1, order+1)); C[0,0] = 1 # we measure only y = noisy x @@ -324,13 +314,13 @@ def convex_smooth(y, A, Q, C, R, proc_huberM, meas_huberM): :param float proc_huberM: Huber loss parameter. :math:`M=0` reduces to :math:`\\sqrt{2}\\ell_1`. :param float meas_huberM: Huber loss parameter. :math:`M=\\infty` reduces to :math:`\\frac{1}{2}\\ell_2^2`. - :return: np.array -- state estimates (state_dim x N) + :return: (np.array) -- state estimates (state_dim x N) """ N = len(y) x_states = cvxpy.Variable((A.shape[0], N)) # each column is [position, velocity, acceleration, ...] at step n def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt - a = 2*np.exp(-M**2 / 2)/M + a = 2*np.exp(-M**2 / 2)/M # huber_const smoothly transitions Huber between 1-norm and 2-norm squared cases b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1) return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b)) @@ -338,17 +328,17 @@ def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13 proc_resids = np.linalg.inv(sqrtm(Q)) @ (x_states[:,1:] - A @ x_states[:,:-1]) # all Q^(-1/2)(x_n - A x_{n-1}) meas_resids = np.linalg.inv(sqrtm(R)) @ (y.reshape(C.shape[0],-1) - C @ x_states) # all R^(-1/2)(y_n - C x_n) # Process terms: sum of J(proc_resids) - objective = 0.5*cvxpy.sum_squares(proc_resids) if huberM == float('inf') \ - else np.sqrt(2)*cvxpy.sum(cvxpy.abs(proc_resids)) if huberM < 1e-3 \ - else huber_const(huberM)*cvxpy.sum(cvxpy.huber(proc_resids, huberM)) # 1/2 l2^2, l1, or Huber + objective = 0.5*cvxpy.sum_squares(proc_resids) if proc_huberM == float('inf') \ + else np.sqrt(2)*cvxpy.sum(cvxpy.abs(proc_resids)) if proc_huberM < 1e-3 \ + else huber_const(proc_huberM)*cvxpy.sum(cvxpy.huber(proc_resids, proc_huberM)) # 1/2 l2^2, l1, or Huber # Measurement terms: sum of V(meas_resids) - objective += 0.5*cvxpy.sum_squares(meas_resids) if huberM == float('inf') \ - else np.sqrt(2)*cvxpy.sum(cvxpy.abs(meas_resids)) if huberM < 1e-3 \ - else huber_const(huberM)*cvxpy.sum(cvxpy.huber(meas_resids, huberM)) # CVXPY quirk: norm(, 1) != sum(abs()) for matrices + objective += 0.5*cvxpy.sum_squares(meas_resids) if meas_huberM == float('inf') \ + else np.sqrt(2)*cvxpy.sum(cvxpy.abs(meas_resids)) if meas_huberM < 1e-3 \ + else huber_const(meas_huberM)*cvxpy.sum(cvxpy.huber(meas_resids, meas_huberM)) # CVXPY quirk: norm(, 1) != sum(abs()) for matrices problem = cvxpy.Problem(cvxpy.Minimize(objective)) try: problem.solve(solver=cvxpy.CLARABEL); print("CLARABEL succeeded") - except cvxpy.error.SolverError: pass # Could try a different solver here, like SCS, but slows things down + except cvxpy.error.SolverError: pass # Could try another solver here, like SCS, but slows things down - if x_states.value is None: return np.full((A.shape[0], N), np.nan) # There can be solver failure, even without exception + if x_states.value is None: return np.full((A.shape[0], N), np.nan) # There can be solver failure, even without error return x_states.value diff --git a/pynumdiff/linear_model/_linear_model.py b/pynumdiff/linear_model/_linear_model.py index 96a5c33..e9b8dca 100644 --- a/pynumdiff/linear_model/_linear_model.py +++ b/pynumdiff/linear_model/_linear_model.py @@ -83,9 +83,8 @@ def lineardiff(x, dt, params=None, options=None, order=None, gamma=None, window_ :param str kernel: name of kernel to use for weighting and smoothing windows ('gaussian' or 'friedrichs') :param str solver: CVXPY solver to use, one of :code:`cvxpy.installed_solvers()` - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: warn("`params` and `options` parameters will be removed in a future version. Use `order`, " + diff --git a/pynumdiff/optimize/_optimize.py b/pynumdiff/optimize/_optimize.py index 689356a..1c21ea0 100644 --- a/pynumdiff/optimize/_optimize.py +++ b/pynumdiff/optimize/_optimize.py @@ -87,15 +87,23 @@ 'forwardbackward': {True, False}}, {'q': (1e-10, 1e10), 'r': (1e-10, 1e10)}), + # robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE + # 'log_q': [1., 4, 8, 12], # decimal after first entry ensure this is treated as float type + # 'log_r': [-1., 1, 4, 8], + # #'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers, + # 'meas_huberM': [0., 2, 6]}, # and 6 assumes basically no outliers -> l2 norm. Try (1 - norm.cdf(M))*2 to see outlier portion + # {'log_q': (-1, 18), + # 'log_r': (-5, 18), + # 'proc_huberM': (0, 6), + # 'meas_huberM': (0, 6)}), robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE 'log_q': [1., 4, 8, 12], # decimal after first entry ensure this is treated as float type 'log_r': [-1., 1, 4, 8], - #'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers, - 'meas_huberM': [0., 2, 6]}, # and 6 assumes basically no outliers -> l2 norm. Try (1 - norm.cdf(M))*2 to see outlier portion - {'log_q': (-1, 18), - 'log_r': (-5, 18), - 'proc_huberM': (0, 6), - 'meas_huberM': (0, 6)}), + #'qr_ratio': [10**k for k in range(-1, 16, 4)], + 'huberM': [0., 5, 10]}, # 0. so type is float. Good choices here really depend on the data scale + {'log_q': (0, 18), + 'log_r': (-5, 10), + 'huberM': (0, 20)}), # really only want to use quadratic when nearby; 20sigma is a huge distance lineardiff: ({'kernel': 'gaussian', 'order': 3, 'gamma': [1e-1, 1, 10, 100], @@ -132,27 +140,28 @@ def _objective_function(point, func, x, dt, singleton_params, categorical_params :return: float, cost of this objective at the point """ + # Short circuit if this hyperparam combo has already been queried, ~10% savings per #160 key = sha1((''.join(f"{v:.3e}" for v in point) + # This hash is stable across processes. Takes bytes ''.join(str(v) for k,v in sorted(categorical_params.items()))).encode()).digest() - if key in cache: return cache[key] # short circuit if this hyperparam combo has already been queried, ~10% savings per #160 + if key in cache: return cache[key] + # Query the differentiation method at this choice of hyperparameters point_params = {k:(v if search_space_types[k] == float else int(np.round(v))) for k,v in zip(search_space_types, point)} # point -> dict - # add back in the singletons we're not searching over + # add back in singletons and categorical choices the Nelder-Mead isn't searching over try: x_hat, dxdt_hat = func(x, dt, **point_params, **singleton_params, **categorical_params) # estimate x and dxdt except np.linalg.LinAlgError: cache[key] = 1e10; return 1e10 # some methods can fail numerically - # evaluate estimate - if dxdt_truth is not None: # then minimize ||dxdt_hat - dxdt_truth||^2 - if metric == 'rmse': - rms_rec_x, rms_x, rms_dxdt = evaluate.rmse(x, dt, x_hat, dxdt_hat, dxdt_truth=dxdt_truth, padding=padding) + # Evaluate estimate according to a loss function + if dxdt_truth is not None: + if metric == 'rmse': # minimize ||dxdt_hat - dxdt_truth||_2 + rms_dxdt = evaluate.rmse(dxdt_truth, dxdt_hat, padding=padding) cache[key] = rms_dxdt; return rms_dxdt elif metric == 'error_correlation': - ec = evaluate.error_correlation(dxdt_hat, dxdt_truth, padding=padding) + ec = evaluate.error_correlation(dxdt_truth, dxdt_hat, padding=padding) cache[key] = ec; return ec - else: # then minimize [ || integral(dxdt_hat) - x||^2 + gamma*TV(dxdt_hat) ] - rms_rec_x, rms_x, rms_dxdt = evaluate.rmse(x, dt, x_hat, dxdt_hat, dxdt_truth=None, padding=padding) - cost = rms_rec_x + tvgamma*evaluate.total_variation(dxdt_hat, padding=padding) + else: # then minimize sqrt{2*Mean(Huber((x_hat- x)/sigma))}*sigma + gamma*TV(dxdt_hat) + cost = evaluate.robust_rme(x, x_hat, padding=padding) + tvgamma*evaluate.total_variation(dxdt_hat, padding=padding) cache[key] = cost; return cost @@ -186,8 +195,6 @@ def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={} """ if metric not in ['rmse','error_correlation']: raise ValueError('`metric` should either be `rmse` or `error_correlation`.') - if metric == 'error_correlation' and dxdt_truth is None: - raise ValueError('`metric` can only be `error_correlation` if `dxdt_truth` is given.') search_space, bounds = method_params_and_bounds[func] search_space.update(search_space_updates) # for things not given, use defaults diff --git a/pynumdiff/polynomial_fit/_polynomial_fit.py b/pynumdiff/polynomial_fit/_polynomial_fit.py index 758f105..33d56c1 100644 --- a/pynumdiff/polynomial_fit/_polynomial_fit.py +++ b/pynumdiff/polynomial_fit/_polynomial_fit.py @@ -19,9 +19,8 @@ def splinediff(x, _t, params=None, options={}, degree=3, s=None, num_iterations= until the smoothing condition is satisfied: :math:`\\sum_t (x[t] - \\text{spline}[t])^2 \\leq s` :param int num_iterations: how many times to apply smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `order`, `s`, and " + @@ -61,9 +60,8 @@ def polydiff(x, dt, params=None, options=None, degree=None, window_size=None, st :param int step_size: step size for sliding :param str kernel: name of kernel to use for weighting and smoothing windows ('gaussian' or 'friedrichs') - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: warn("`params` and `options` parameters will be removed in a future version. Use `degree` " + @@ -115,9 +113,8 @@ def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None, :param int smoothing_win: size of the window used for gaussian smoothing, a good default is window_size, but smaller for high frequnecy data - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `degree`, " + @@ -138,7 +135,6 @@ def savgoldiff(x, dt, params=None, options=None, degree=None, window_size=None, dxdt_hat = utility.convolutional_smoother(dxdt_hat, kernel) x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt) - x0 = utility.estimate_integration_constant(x, x_hat) - x_hat = x_hat + x0 + x_hat += utility.estimate_integration_constant(x, x_hat) return x_hat, dxdt_hat diff --git a/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py b/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py index cb0d5c9..4d6cb10 100644 --- a/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py +++ b/pynumdiff/smooth_finite_difference/_smooth_finite_difference.py @@ -19,9 +19,8 @@ def kerneldiff(x, dt, kernel='friedrichs', window_size=5, num_iterations=1): :param int window_size: filtering kernel size :param int num_iterations: how many times to apply mean smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if kernel in ['mean', 'gaussian', 'friedrichs']: kernel = getattr(utility, f"{kernel}_kernel")(window_size) @@ -50,9 +49,8 @@ def meandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1): :param int window_size: filter window size :param int num_iterations: how many times to apply mean smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " + @@ -76,9 +74,8 @@ def mediandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1): :param int window_size: filter window size :param int num_iterations: how many times to apply median smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " + @@ -103,9 +100,8 @@ def gaussiandiff(x, dt, params=None, options={}, window_size=5, num_iterations=1 :param int window_size: filter window size :param int num_iterations: how many times to apply gaussian smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " + @@ -130,9 +126,8 @@ def friedrichsdiff(x, dt, params=None, options={}, window_size=5, num_iterations :param int window_size: filter window size :param int num_iterations: how many times to apply smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `window_size` " + @@ -158,9 +153,8 @@ def butterdiff(x, dt, params=None, options={}, filter_order=2, cutoff_freq=0.5, value is normalized to the range 0-1, where 1 is the Nyquist frequency. :param int num_iterations: how many times to apply smoothing - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `filter_order`, " + diff --git a/pynumdiff/tests/test_diff_methods.py b/pynumdiff/tests/test_diff_methods.py index bf82cf0..211802c 100644 --- a/pynumdiff/tests/test_diff_methods.py +++ b/pynumdiff/tests/test_diff_methods.py @@ -51,7 +51,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) (constant_acceleration, {'r':1e-3, 'q':1e4}), (constant_acceleration, [1e-3, 1e4]), (constant_jerk, {'r':1e-4, 'q':1e5}), (constant_jerk, [1e-4, 1e5]), (rtsdiff, {'order':2, 'log_qr_ratio':7, 'forwardbackward':True}), - #(robustdiff, {'order':3, 'qr_ratio':1e8}), # Add back later, once the design stabilizes + #(robustdiff, {'order':3, 'log_q':8, 'log_r':0}), # Add back when design stabilizes (velocity, {'gamma':0.5}), (velocity, [0.5]), (acceleration, {'gamma':1}), (acceleration, [1]), (jerk, {'gamma':10}), (jerk, [10]), @@ -109,8 +109,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-25, -25), (0, -1), (0, 0), (1, 1)], [(-25, -25), (1, 1), (0, 0), (1, 1)], [(-25, -25), (3, 3), (0, 0), (3, 3)]], - iterated_second_order: [[(-9, -10), (-25, -25), (0, -1), (0, 0)], - [(-9, -10), (-14, -14), (0, -1), (0, 0)], + iterated_second_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)], + [(-7, -8), (-14, -14), (0, -1), (0, 0)], [(-1, -1), (0, 0), (0, -1), (0, 0)], [(0, 0), (1, 0), (0, 0), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -121,8 +121,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-25, -25), (-2, -2), (0, 0), (1, 1)], [(-25, -25), (1, 0), (0, 0), (1, 1)], [(-25, -25), (2, 2), (0, 0), (2, 2)]], - iterated_fourth_order: [[(-9, -10), (-25, -25), (0, -1), (0, 0)], - [(-9, -10), (-13, -13), (0, -1), (0, 0)], + iterated_fourth_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)], + [(-7, -8), (-13, -13), (0, -1), (0, 0)], [(-1, -1), (0, 0), (-1, -1), (0, 0)], [(0, -1), (1, 1), (0, 0), (1, 1)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -133,8 +133,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-2, -2), (0, 0), (0, -1), (1, 1)], [(0, 0), (1, 1), (0, -1), (1, 1)], [(0, 0), (3, 3), (0, 0), (3, 3)]], - savgoldiff: [[(-9, -10), (-13, -14), (0, -1), (0, 0)], - [(-9, -10), (-13, -13), (0, -1), (0, 0)], + savgoldiff: [[(-7, -7), (-13, -14), (0, -1), (0, 0)], + [(-7, -7), (-13, -13), (0, -1), (0, 0)], [(-2, -2), (-1, -1), (0, -1), (0, 0)], [(0, -1), (0, 0), (0, 0), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -151,7 +151,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(0, 0), (1, 1), (0, 0), (1, 1)], [(1, 0), (2, 2), (1, 0), (2, 2)], [(1, 0), (3, 3), (1, 0), (3, 3)]], - spectraldiff: [[(-9, -10), (-14, -15), (-1, -1), (0, 0)], + spectraldiff: [[(-15, -15), (-14, -15), (0, -1), (0, 0)], [(0, 0), (1, 1), (0, 0), (1, 1)], [(1, 1), (1, 1), (1, 1), (1, 1)], [(0, 0), (1, 1), (0, 0), (1, 1)], @@ -181,14 +181,14 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(0, 0), (1, 1), (0, 0), (1, 1)], [(1, 1), (2, 2), (1, 1), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - iterative_velocity: [[(-9, -10), (-25, -25), (0, -1), (0, 0)], + iterative_velocity: [[(-7, -8), (-25, -25), (0, -1), (0, 0)], [(0, 0), (0, 0), (0, 0), (1, 0)], [(0, 0), (1, 0), (1, 0), (1, 0)], [(1, 0), (1, 1), (1, 0), (1, 1)], [(2, 1), (2, 2), (2, 1), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - smooth_acceleration: [[(-9, -10), (-18, -18), (0, -1), (0, 0)], - [(-9, -9), (-10, -10), (-1, -1), (-1, -1)], + smooth_acceleration: [[(-7, -8), (-18, -18), (0, -1), (0, 0)], + [(-7, -7), (-10, -10), (-1, -1), (-1, -1)], [(-2, -2), (-1, -1), (-1, -1), (0, -1)], [(0, 0), (1, 0), (0, -1), (1, 0)], [(1, 1), (2, 2), (1, 1), (2, 2)], @@ -229,7 +229,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs) [(-12, -12), (-2, -2), (0, -1), (1, 1)], [(0, 0), (2, 2), (0, 0), (2, 2)], [(1, 1), (3, 3), (1, 1), (3, 3)]], - lineardiff: [[(-6, -6), (-5, -6), (0, -1), (0, 0)], + lineardiff: [[(-7, -8), (-14, -14), (0, -1), (0, 0)], [(0, 0), (2, 1), (0, 0), (2, 1)], [(1, 0), (2, 2), (1, 0), (2, 2)], [(1, 0), (2, 1), (1, 0), (2, 1)], diff --git a/pynumdiff/tests/test_optimize.py b/pynumdiff/tests/test_optimize.py index 7b8d480..1cc156f 100644 --- a/pynumdiff/tests/test_optimize.py +++ b/pynumdiff/tests/test_optimize.py @@ -25,30 +25,6 @@ def test_finite_difference(): assert params1['num_iterations'] == 5 assert params2['num_iterations'] == 1 -def test_mediandiff(): - params1, val1 = optimize(mediandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto') - params2, val2 = optimize(mediandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto') - assert params1['window_size'] == 5 - assert params2['window_size'] == 1 - -def test_meandiff(): - params1, val1 = optimize(meandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto') - params2, val2 = optimize(meandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto') - assert params1['window_size'] == 5 - assert params2['window_size'] == 1 - -def test_gaussiandiff(): - params1, val1 = optimize(gaussiandiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto') - params2, val2 = optimize(gaussiandiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto') - assert params1['window_size'] == 9 - assert params2['window_size'] == 1 - -def test_friedrichsdiff(): - params1, val1 = optimize(friedrichsdiff, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto') - params2, val2 = optimize(friedrichsdiff, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto') - assert params1['window_size'] == 9 - assert params2['window_size'] == 1 - def test_iterative_velocity(): params1, val1 = optimize(iterative_velocity, x, dt, dxdt_truth=dxdt_truth, search_space_updates={'num_iterations':1}, padding='auto') params2, val2 = optimize(iterative_velocity, x, dt, tvgamma=tvgamma, search_space_updates={'num_iterations':1}, padding='auto') @@ -56,26 +32,6 @@ def test_iterative_velocity(): np.testing.assert_almost_equal(params1['gamma'], 0.0001, decimal=4) np.testing.assert_almost_equal(params2['gamma'], 0.0001, decimal=4) -def test_velocity(): - try: import cvxpy - except: skip("could not import cvxpy, skipping test_velocity") - - params1, val1 = optimize(velocity, x, dt, dxdt_truth=dxdt_truth, padding='auto', maxiter=20) - params2, val2 = optimize(velocity, x, dt, tvgamma=tvgamma, padding='auto', maxiter=20) - - np.testing.assert_almost_equal(params1['gamma'], 0.0769, decimal=3) - np.testing.assert_almost_equal(params2['gamma'], 0.010, decimal=3) - -def test_acceleration(): - try: import cvxpy - except: pytest.skip("could not import cvxpy, skipping test_acceleration") - - params1, val1 = optimize(acceleration, x, dt, dxdt_truth=dxdt_truth, padding='auto', maxiter=20) - params2, val2 = optimize(acceleration, x, dt, tvgamma=tvgamma, padding='auto', maxiter=20) - - np.testing.assert_almost_equal(params1['gamma'], 0.147, decimal=3) - np.testing.assert_almost_equal(params2['gamma'], 0.0046, decimal=4) - def test_savgoldiff(): params1, val1 = optimize(savgoldiff, x, dt, dxdt_truth=dxdt_truth, padding='auto') params2, val2 = optimize(savgoldiff, x, dt, tvgamma=tvgamma, padding='auto') diff --git a/pynumdiff/tests/test_utils.py b/pynumdiff/tests/test_utils.py index 8a07e0e..5d35631 100644 --- a/pynumdiff/tests/test_utils.py +++ b/pynumdiff/tests/test_utils.py @@ -22,16 +22,22 @@ def test_integrate_dxdt_hat(): def test_estimate_integration_constant(): """For known simple functions, make sure the initial condition is as expected""" - for x,x_hat,c in [([1.0, 2.0, 3.0, 4.0, 5.0], [0.0, 1.0, 2.0, 3.0, 4.0], 1), # Perfect alignment case, xhat shifted by 1 - (np.ones(5)*10, np.ones(5)*5, 5), - ([0], [1], -1)]: + for x,x_hat,c in [(np.array([1.0, 2.0, 3.0, 4.0, 5.0]), np.array([0.0, 1.0, 2.0, 3.0, 4.0]), 1), # Perfect alignment case, xhat shifted by 1 + (np.ones(5)*10, np.ones(5)*5, 5), + (np.array([0]), np.array([1]), -1)]: x0 = utility.estimate_integration_constant(x, x_hat) assert np.allclose(x0, float(c), rtol=1e-3) - np.random.seed(42) # Noisy case. Seed for reproducibility - x0 = utility.estimate_integration_constant([1.0, 2.0, 3.0, 4.0, 5.0], - np.array([0.0, 1.0, 2.0, 3.0, 4.0]) + np.random.normal(0, 0.1, 5)) - assert 0.9 < x0 < 1.1 # The result should be close to 1.0, but not exactly due to noise + x_hat = np.sin(np.arange(400)*0.01) + x = x_hat + np.random.normal(0, 0.1, 400) + 1 # shift data by 1 + x0 = utility.estimate_integration_constant(x, x_hat, M=float('inf')) + assert 0.95 < x0 < 1.05 # The result should be close to 1.0, but not exactly due to noise + + x[100] = 100 # outlier case + x0 = utility.estimate_integration_constant(x, x_hat, M=0) + assert 0.95 < x0 < 1.05 + x0 = utility.estimate_integration_constant(x, x_hat, M=6) + assert 0.95 < x0 < 1.05 def test_convolutional_smoother(): @@ -63,19 +69,19 @@ def test_peakdet(request): pyplot.title('peakdet validataion') pyplot.show() - assert np.allclose(maxtab, [[0.473, 1.59843494], # these numbers validated by eye with --plot - [1.795, 1.90920786], - [3.314, -0.04585991], - [4.992, 0.74798665], - [6.345, 1.89597554], - [7.778, 0.57190318], - [9.424, 0.58764606]]) - assert np.allclose(mintab, [[1.096, 0.30361178], - [2.739, -1.12624328], - [4.072, -2.00254655], - [5.582, -0.31529832], - [7.135, -0.58327787], - [8.603, -1.71278265]]) + assert np.allclose(maxtab, [[0.447, 1.58575613], # these numbers validated by eye with --plot + [1.818, 1.91349239], + [3.316,-0.02740252], + [4.976, 0.74512778], + [6.338, 1.89861691], + [7.765, 0.57577842], + [9.402, 0.59450898]]) + assert np.allclose(mintab, [[1.139, 0.31325728], + [2.752,-1.12769567], + [4.098,-2.00326946], + [5.507,-0.31714122], + [7.211,-0.59708324], + [8.612,-1.7118216 ]]) def test_slide_function(): """Verify the slide function's weighting scheme calculates as expected""" @@ -90,6 +96,7 @@ def identity(x, dt): return x, 0 # should come back the same def test_simulations(request): + """Just sprint through running them all to make sure they go. Optionally plot with flag.""" if request.config.getoption("--plot"): from matplotlib import pyplot fig, axes = pyplot.subplots(2, 3, figsize=(18,7), constrained_layout=True) @@ -110,5 +117,13 @@ def test_simulations(request): ax.set_title(title, fontsize=18) if i == 5: ax.legend(loc='lower right', fontsize=12) -# def test_evaluate(): -# return + +def test_robust_rme(): + """Ensure the robust error metric is the same as RMSE for big M, and that it does + better in the presence of outliers""" + u = np.sin(np.arange(100)*0.1) + v = u + np.random.randn(100) + assert np.allclose(evaluate.rmse(u, v), evaluate.robust_rme(u, v, M=6)) + + v[40] = 100 # throw an outlier in there + assert evaluate.robust_rme(u, v, M=2) < evaluate.rmse(u, v) diff --git a/pynumdiff/total_variation_regularization/_total_variation_regularization.py b/pynumdiff/total_variation_regularization/_total_variation_regularization.py index 9645af6..1dc76d9 100644 --- a/pynumdiff/total_variation_regularization/_total_variation_regularization.py +++ b/pynumdiff/total_variation_regularization/_total_variation_regularization.py @@ -30,9 +30,8 @@ def iterative_velocity(x, dt, params=None, options=None, num_iterations=None, ga :code:`'large'` has simpler numerics but is more efficient for large-scale problems. :code:`'large'` is more readily modified for higher-order derivatives, since the implicit differentiation matrix is square. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `num_iterations`, " + @@ -65,9 +64,8 @@ def tvrdiff(x, dt, order, gamma, solver=None): :param str solver: Solver to use. Solver options include: 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS'. In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ # Normalize for numerical consistency with convex solver mean = np.mean(x) @@ -119,9 +117,8 @@ def velocity(x, dt, params=None, options=None, gamma=None, solver=None): :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc. In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " + @@ -148,9 +145,8 @@ def acceleration(x, dt, params=None, options=None, gamma=None, solver=None): :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc. In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " + @@ -177,9 +173,8 @@ def jerk(x, dt, params=None, options=None, gamma=None, solver=None): :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc. In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " + @@ -208,9 +203,8 @@ def smooth_acceleration(x, dt, params=None, options=None, gamma=None, window_siz :param str solver: the solver CVXPY should use, 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS', etc. In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " + @@ -246,9 +240,8 @@ def jerk_sliding(x, dt, params=None, options=None, gamma=None, solver=None, wind In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default. :param int window_size: how wide to make the kernel - :return: tuple[np.array, np.array] of\n - - **x_hat** -- estimated (smoothed) x - - **dxdt_hat** -- estimated derivative of x + :return: - **x_hat** (np.array) -- estimated (smoothed) x + - **dxdt_hat** (np.array) -- estimated derivative of x """ if params != None: # Warning to support old interface for a while. Remove these lines along with params in a future release. warn("`params` and `options` parameters will be removed in a future version. Use `gamma` " + diff --git a/pynumdiff/utils/evaluate.py b/pynumdiff/utils/evaluate.py index c1e2e01..96f5c30 100644 --- a/pynumdiff/utils/evaluate.py +++ b/pynumdiff/utils/evaluate.py @@ -20,7 +20,7 @@ def plot(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth, xlim=None, show_error=True :param bool show_error: whether to show the rmse :param int markersize: marker size of noisy observations - :return: Display two plots + :return: (tuple) -- figure and axes """ t = np.arange(len(x))*dt if xlim is None: @@ -48,8 +48,8 @@ def plot(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth, xlim=None, show_error=True axes[1].legend(loc='lower right', fontsize=12) if show_error: - _, _, rms_dxdt = rmse(x, dt, x_hat, dxdt_hat, x_truth, dxdt_truth) - R_sqr = error_correlation(dxdt_hat, dxdt_truth) + rms_dxdt = rmse(dxdt_truth, dxdt_hat) + R_sqr = error_correlation(dxdt_truth, dxdt_hat) axes[1].text(0.05, 0.95, f"RMSE = {rms_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}", transform=axes[1].transAxes, fontsize=15, verticalalignment='top') @@ -70,71 +70,68 @@ def plot_comparison(dt, dxdt_truth, dxdt_hat1, title1, dxdt_hat2, title2, dxdt_h axes[i].tick_params(axis='y', labelsize=15) axes[i].set_title(title, fontsize=18) if i==2: axes[i].legend(loc='lower right', fontsize=12) - _, _, rms_dxdt = rmse(np.zeros(dxdt_hat.shape), dt, np.zeros(dxdt_hat.shape), dxdt_hat, np.zeros(dxdt_hat.shape), dxdt_truth) - R_sqr = error_correlation(dxdt_hat, dxdt_truth) - axes[i].text(0.05, 0.95, f"RMSE = {rms_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}", + rmse_dxdt = rmse(dxdt_truth, dxdt_hat) + R_sqr = error_correlation(dxdt_truth, dxdt_hat) + axes[i].text(0.05, 0.95, f"RMSE = {rmse_dxdt:.2f}\n$R^2$ = {R_sqr:.2g}", transform=axes[i].transAxes, fontsize=15, verticalalignment='top') fig.tight_layout() -def rmse(x, dt, x_hat, dxdt_hat, x_truth=None, dxdt_truth=None, padding=0): - """Evaluate x_hat based on RMSE, calculating different ones depending on whether :code:`dxdt_truth` - and :code:`x_truth` are known. +def robust_rme(x, x_hat, padding=0, M=6): + """Robustified/Huberized Root Mean Error metric, used to determine fit between smooth estimate and data. + Equals np.linalg.norm(x[s] - x_hat[s]) / np.sqrt(N) if M=float('inf'), and dang close for even M=6 or even 2. - :param np.array[float] x: data that was differentiated - :param float dt: step size - :param np.array[float] x_hat: estimated (smoothed) x - :param np.array[float] dxdt_hat: estimated xdot - :param np.array[float] x_truth: true value of x, if known - :param np.array[float] dxdt_truth: true value of dxdt, if known, optional + :param np.array[float] x: noisy data + :param np.array[float] x_hat: estimated smoothed signal, returned by differentiation algorithms in addition + to derivative :param int padding: number of snapshots on either side of the array to ignore when calculating the metric. If :code:`'auto'`, defaults to 2.5% of the size of x + :param float M: Huber loss parameter in units of ~1.4*mean absolute deviation, intended to approximate + standard deviation robustly. - :return: tuple[float, float, float] containing\n - - **rms_rec_x** -- RMS error between the integral of dxdt_hat and x - - **rms_x** -- RMS error between x_hat and x_truth, returns None if x_truth is None - - **rms_dxdt** -- RMS error between dxdt_hat and dxdt_truth, returns None if dxdt_hat is None + :return: **robust_rmse_x_hat** (float) -- RMS error between x_hat and data """ - if np.isnan(x_hat).any(): - return np.nan, np.nan, np.nan - if padding == 'auto': - padding = int(0.025*len(x)) - padding = max(padding, 1) + if padding == 'auto': padding = max([1, int(0.025*len(x))]) s = slice(padding, len(x)-padding) # slice out data we want to measure + N = s.stop - s.start + + sigma = stats.median_abs_deviation(x[s] - x_hat[s], scale='normal') # M is in units of this robust scatter metric + if sigma < 1e-6: sigma = 1 # guard against divide by zero + return np.sqrt(2*np.mean(utility.huber((x[s] - x_hat[s])/sigma, M))) * sigma - # RMS of dxdt and x_hat - root = np.sqrt(s.stop - s.start) - rms_dxdt = np.linalg.norm(dxdt_hat[s] - dxdt_truth[s]) / root if dxdt_truth is not None else None - rms_x = np.linalg.norm(x_hat[s] - x_truth[s]) / root if x_truth is not None else None - # RMS reconstructed x from integrating dxdt vs given raw x, available even in the absence of ground truth - rec_x = utility.integrate_dxdt_hat(dxdt_hat, dt) - x0 = utility.estimate_integration_constant(x, rec_x) - rec_x = rec_x + x0 - rms_rec_x = np.linalg.norm(rec_x[s] - x[s]) / root +def rmse(dxdt_truth, dxdt_hat, padding=0): + """True RMSE between vectors + + :param np.array[float] dxdt_truth: known true derivative + :param np.array[float] dxdt_hat: estimated derivative + :param int padding: number of snapshots on either side of the array to ignore when calculating + the metric. If :code:`'auto'`, defaults to 2.5% of the size of x + + :return: **true_rmse_dxdt** (float) -- RMS error between dxdt_hat and dxdt_truth, returns None if dxdt_hat is None + """ + if padding == 'auto': padding = max([1, int(0.025*len(dxdt_truth))]) + s = slice(padding, len(dxdt_hat)-padding) # slice out data we want to measure + N = s.stop - s.start - return rms_rec_x, rms_x, rms_dxdt + return np.linalg.norm(dxdt_hat[s] - dxdt_truth[s]) / np.sqrt(N) if dxdt_truth is not None else None -def error_correlation(dxdt_hat, dxdt_truth, padding=0): - """Calculate the error correlation (pearsons correlation coefficient) between the estimated - dxdt and true dxdt +def error_correlation(dxdt_truth, dxdt_hat, padding=0): + """Calculate the error correlation (pearsons correlation coefficient) between vectors - :param np.array[float] dxdt_hat: estimated xdot :param np.array[float] dxdt_truth: true value of dxdt, if known, optional + :param np.array[float] dxdt_hat: estimated xdot :param int padding: number of snapshots on either side of the array to ignore when calculating the metric. If :code:`'auto'`, defaults to 2.5% of the size of x :return: (float) -- r-squared correlation coefficient """ - if padding == 'auto': - padding = int(0.025*len(dxdt_hat)) - padding = max(padding, 1) + if padding == 'auto': padding = max(1, int(0.025*len(dxdt_hat))) s = slice(padding, len(dxdt_hat)-padding) # slice out data we want to measure - errors = (dxdt_hat[s] - dxdt_truth[s]) - r = stats.linregress(dxdt_truth[s] - np.mean(dxdt_truth[s]), errors) - return r.rvalue**2 + + return stats.linregress(dxdt_truth[s], dxdt_hat[s] - dxdt_truth[s]).rvalue**2 def total_variation(x, padding=0): @@ -146,11 +143,7 @@ def total_variation(x, padding=0): :return: (float) -- total variation """ - if np.isnan(x).any(): - return np.nan - if padding == 'auto': - padding = int(0.025*len(x)) - padding = max(padding, 1) + if padding == 'auto': padding = max(1, int(0.025*len(x))) x = x[padding:len(x)-padding] return np.linalg.norm(x[1:]-x[:-1], 1)/len(x) # normalized version of what cvxpy.tv does diff --git a/pynumdiff/utils/utility.py b/pynumdiff/utils/utility.py index ce275da..f5b1a71 100644 --- a/pynumdiff/utils/utility.py +++ b/pynumdiff/utils/utility.py @@ -2,6 +2,7 @@ import numpy as np from scipy.integrate import cumulative_trapezoid from scipy.optimize import minimize +from scipy.stats import median_abs_deviation def hankel_matrix(x, num_delays, pad=False): # fixed delay step of 1 @@ -96,9 +97,14 @@ def peakdet(x, delta, t=None): return np.array(maxtab), np.array(mintab) -# Trapazoidal integration, with 0 first value so that the lengths match. See #88. +def huber(x, M): + """Huber loss function, for outlier-robust applications, `see here `_""" + absx = np.abs(x) + return np.where(absx <= M, 0.5*x**2, M*(absx - 0.5*M)) + + def integrate_dxdt_hat(dxdt_hat, _t): - """Wrapper for scipy.integrate.cumulative_trapezoid + """Wrapper for scipy.integrate.cumulative_trapezoid. Use 0 as first value so lengths match, see #88. :param np.array[float] dxdt_hat: estimate derivative of timeseries :param float _t: step size if given as a scalar or a vector of sample locations @@ -109,22 +115,30 @@ def integrate_dxdt_hat(dxdt_hat, _t): else cumulative_trapezoid(dxdt_hat, x=_t, initial=0) -# Optimization routine to estimate the integration constant. -def estimate_integration_constant(x, x_hat): +def estimate_integration_constant(x, x_hat, M=6): """Integration leaves an unknown integration constant. This function finds a best fit integration constant given x and x_hat (the integral of dxdt_hat) by optimizing :math:`\\min_c ||x - \\hat{x} + c||_2`. :param np.array[float] x: timeseries of measurements - :param np.array[float] x_hat: smoothed estiamte of x, for the purpose of this function this should have been determined - by integrate_dxdt_hat + :param np.array[float] x_hat: smoothed estimate of x + :param float M: robustifies constant estimation using Huber loss. The default is intended to capture the idea + of "six sigma": Assuming Gaussian inliers and M in units of standard deviation, the portion of inliers + beyond the Huber loss' transition is only about 1.97e-9. M here is in units of scaled mean absolute deviation, + so scatter can be calculated and used to normalize data without being thrown off by outliers. :return: **integration constant** (float) -- initial condition that best aligns x_hat with x """ - return minimize(lambda x0, x, xhat: np.linalg.norm(x - (x_hat+x0)), # fn to minimize in 1st argument - 0, args=(x, x_hat), method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar + if M == float('inf'): # calculates the constant to be mean(diff(x, x_hat)), equivalent to argmin_{x0} ||x_hat + x0 - x||_2^2 + return np.mean(x - x_hat) # Solves the L2 distance minimization + elif M < 0.1: # small M looks like L1 loss, and Huber gets too flat to work well + return np.median(x - x_hat) # Solves the L1 distance minimization + else: + sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric + if sigma < 1e-6: sigma = 1 # guard against divide by zero + return minimize(lambda x0: np.mean(huber((x - (x_hat+x0))/sigma, M)), # fn to minimize in 1st argument + 0, method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar -# kernels def mean_kernel(window_size): """A uniform boxcar of total integral 1""" return np.ones(window_size)/window_size @@ -142,6 +156,7 @@ def friedrichs_kernel(window_size): ker = np.exp(-1/(1-x**2)) return ker / np.sum(ker) + def convolutional_smoother(x, kernel, num_iterations=1): """Perform smoothing by convolving x with a kernel.