-
Notifications
You must be signed in to change notification settings - Fork 95
0.3 kan #764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 0.3
Are you sure you want to change the base?
0.3 kan #764
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,275 @@ | ||
| """Create the infrastructure for a KAN layer""" | ||
| import torch | ||
| import numpy as np | ||
|
|
||
| from pina._src.model.spline import Spline | ||
| from pina._src.model.vectorized_spline import VectorizedSpline | ||
|
|
||
|
|
||
| class KANBlock(torch.nn.Module): | ||
| """define a KAN layer using splines""" | ||
| def __init__(self, k, input_dimensions, output_dimensions, inner_nodes, | ||
| num=3, grid_eps=0.1, grid_range=[-1, 1], grid_extension=True, | ||
| noise_scale=0.1, base_function=torch.nn.SiLU(), scale_base_mu=0.0, | ||
| scale_base_sigma=1.0, scale_sp=1.0, sparse_init=True, sp_trainable=True, | ||
| sb_trainable=True): | ||
| """ | ||
| Initialize the KAN layer. | ||
|
|
||
| num è il numero di intervalli nella griglia iniziale (esclusi gli eventuali nodi di estensione) | ||
| """ | ||
| super().__init__() | ||
| self.k = k | ||
| self.input_dimensions = input_dimensions | ||
| self.output_dimensions = output_dimensions | ||
| self.inner_nodes = inner_nodes | ||
| self.num = num | ||
| self.grid_eps = grid_eps | ||
| self.grid_range = grid_range | ||
| self.grid_extension = grid_extension | ||
| self.vec = True | ||
| # self.vec = False | ||
|
|
||
| if sparse_init: | ||
| self.mask = torch.nn.Parameter(self.sparse_mask(input_dimensions, output_dimensions)).requires_grad_(False) | ||
| else: | ||
| self.mask = torch.nn.Parameter(torch.ones(input_dimensions, output_dimensions)).requires_grad_(False) | ||
|
|
||
| grid = torch.linspace(grid_range[0], grid_range[1], steps=self.num + 1)[None,:].expand(self.input_dimensions, self.num+1) | ||
| knots = torch.linspace(grid_range[0], grid_range[1], steps=self.num + 1) | ||
|
|
||
| if grid_extension: | ||
| h = (grid[:, [-1]] - grid[:, [0]]) / (grid.shape[1] - 1) | ||
| for i in range(self.k): | ||
| grid = torch.cat([grid[:, [0]] - h, grid], dim=1) | ||
| grid = torch.cat([grid, grid[:, [-1]] + h], dim=1) | ||
|
|
||
| n_control_points = len(knots) - (self.k ) | ||
|
|
||
| # control_points = torch.nn.Parameter( | ||
| # torch.randn(self.input_dimensions, self.output_dimensions, n_control_points) * noise_scale | ||
| # ) | ||
| # print(control_points.shape) | ||
| if self.vec: | ||
| control_points = torch.randn(self.input_dimensions * self.output_dimensions, n_control_points) | ||
| print('control points', control_points.shape) | ||
| control_points = torch.stack([ | ||
| torch.randn(n_control_points) | ||
| for _ in range(self.input_dimensions * self.output_dimensions) | ||
| ]) | ||
| print('control points', control_points.shape) | ||
| self.spline_q = VectorizedSpline( | ||
| order=self.k, | ||
| knots=knots, | ||
| control_points=control_points | ||
| ) | ||
|
|
||
| else: | ||
| spline_q = [] | ||
| for q in range(self.output_dimensions): | ||
| spline_p = [] | ||
| for p in range(self.input_dimensions): | ||
| spline_ = Spline( | ||
| order=self.k, | ||
| knots=knots, | ||
| control_points=torch.randn(n_control_points) | ||
| ) | ||
| spline_p.append(spline_) | ||
| spline_p = torch.nn.ModuleList(spline_p) | ||
| spline_q.append(spline_p) | ||
| self.spline_q = torch.nn.ModuleList(spline_q) | ||
|
|
||
|
|
||
| # control_points = torch.nn.Parameter( | ||
| # torch.randn(n_control_points, self.output_dimensions) * noise_scale) | ||
| # print(control_points) | ||
| # print('uuu') | ||
|
|
||
| # self.spline = Spline( | ||
| # order=self.k, knots=knots, control_points=control_points) | ||
|
|
||
| # self.scale_base = torch.nn.Parameter(scale_base_mu * 1 / np.sqrt(input_dimensions) + \ | ||
| # scale_base_sigma * (torch.rand(input_dimensions, output_dimensions)*2-1) * 1/np.sqrt(input_dimensions), requires_grad=sb_trainable) | ||
| # self.scale_spline = torch.nn.Parameter(torch.ones(input_dimensions, output_dimensions) * scale_sp * 1 / np.sqrt(input_dimensions) * self.mask, requires_grad=sp_trainable) | ||
| self.base_function = base_function | ||
|
|
||
| @staticmethod | ||
| def sparse_mask(in_dimensions: int, out_dimensions: int) -> torch.Tensor: | ||
| ''' | ||
| get sparse mask | ||
| ''' | ||
| in_coord = torch.arange(in_dimensions) * 1/in_dimensions + 1/(2*in_dimensions) | ||
| out_coord = torch.arange(out_dimensions) * 1/out_dimensions + 1/(2*out_dimensions) | ||
|
|
||
| dist_mat = torch.abs(out_coord[:,None] - in_coord[None,:]) | ||
| in_nearest = torch.argmin(dist_mat, dim=0) | ||
| in_connection = torch.stack([torch.arange(in_dimensions), in_nearest]).permute(1,0) | ||
| out_nearest = torch.argmin(dist_mat, dim=1) | ||
| out_connection = torch.stack([out_nearest, torch.arange(out_dimensions)]).permute(1,0) | ||
| all_connection = torch.cat([in_connection, out_connection], dim=0) | ||
| mask = torch.zeros(in_dimensions, out_dimensions) | ||
| mask[all_connection[:,0], all_connection[:,1]] = 1. | ||
| return mask | ||
|
|
||
| def forward(self, x: torch.Tensor) -> torch.Tensor: | ||
| """ | ||
| Forward pass through the KAN layer. | ||
| Each input goes through: w_base*base(x) + w_spline*spline(x) | ||
| Then sum across input dimensions for each output node. | ||
| """ | ||
| if hasattr(x, 'tensor'): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this for? |
||
| x_tensor = x.tensor | ||
| else: | ||
| x_tensor = x | ||
|
|
||
|
|
||
| if self.vec: | ||
| y = self.spline_q.forward(x_tensor) # (batch, output_dimensions, input_dimensions) | ||
| y = y.reshape(y.shape[0], y.shape[1], self.output_dimensions, self.input_dimensions) | ||
| base_out = self.base_function(x_tensor) # (batch, input_dimensions) | ||
| y = y + base_out[:, :, None, None] | ||
| y = y.sum(dim=3).sum(dim=1) # sum over input dimensions | ||
| else: | ||
| y = [] | ||
| for q in range(self.output_dimensions): | ||
| y_q = [] | ||
| for p in range(self.input_dimensions): | ||
| spline_out = self.spline_q[q][p].forward(x_tensor[:, p]) # (batch, input_dimensions, output_dimensions) | ||
| base_out = self.base_function(x_tensor[:, p]) # (batch, input_dimensions) | ||
| y_q.append(spline_out + base_out) | ||
| y.append(torch.stack(y_q, dim=1).sum(dim=1)) | ||
| y = torch.stack(y, dim=1) | ||
|
|
||
| return y | ||
|
|
||
| def update_grid_from_samples(self, x: torch.Tensor, mode: str = 'sample'): | ||
| """ | ||
| Update grid from input samples to better fit data distribution. | ||
| Based on PyKAN implementation but with boundary preservation. | ||
| """ | ||
| # Convert LabelTensor to regular tensor for spline operations | ||
| if hasattr(x, 'tensor'): | ||
| # This is a LabelTensor, extract the tensor part | ||
| x_tensor = x.tensor | ||
| else: | ||
| x_tensor = x | ||
|
Comment on lines
+151
to
+155
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this |
||
|
|
||
| with torch.no_grad(): | ||
| batch_size = x_tensor.shape[0] | ||
| x_sorted = torch.sort(x_tensor, dim=0)[0] # (batch_size, input_dimensions) | ||
|
|
||
| # Get current number of intervals (excluding extensions) | ||
| if self.grid_extension: | ||
| num_interval = self.spline.knots.shape[1] - 1 - 2*self.k | ||
| else: | ||
| num_interval = self.spline.knots.shape[1] - 1 | ||
|
|
||
| def get_grid(num_intervals: int): | ||
| """PyKAN-style grid creation with boundary preservation""" | ||
| ids = [int(batch_size * i / num_intervals) for i in range(num_intervals)] + [-1] | ||
| grid_adaptive = x_sorted[ids, :].transpose(0, 1) # (input_dimensions, num_intervals+1) | ||
|
|
||
| original_min = self.grid_range[0] | ||
| original_max = self.grid_range[1] | ||
|
|
||
| # Clamp adaptive grid to not shrink beyond original domain | ||
| grid_adaptive[:, 0] = torch.min(grid_adaptive[:, 0], | ||
| torch.full_like(grid_adaptive[:, 0], original_min)) | ||
| grid_adaptive[:, -1] = torch.max(grid_adaptive[:, -1], | ||
| torch.full_like(grid_adaptive[:, -1], original_max)) | ||
|
|
||
| margin = 0.0 | ||
| h = (grid_adaptive[:, [-1]] - grid_adaptive[:, [0]] + 2 * margin) / num_intervals | ||
| grid_uniform = (grid_adaptive[:, [0]] - margin + | ||
| h * torch.arange(num_intervals + 1, device=x_tensor.device, dtype=x_tensor.dtype)[None, :]) | ||
|
|
||
| grid_blended = (self.grid_eps * grid_uniform + | ||
| (1 - self.grid_eps) * grid_adaptive) | ||
|
|
||
| return grid_blended | ||
|
|
||
| # Create augmented evaluation points: samples + boundary points | ||
| # This ensures we preserve boundary behavior while adapting to sample density | ||
| boundary_points = torch.tensor([[self.grid_range[0]], [self.grid_range[1]]], | ||
| device=x_tensor.device, dtype=x_tensor.dtype).expand(-1, self.input_dimensions) | ||
|
|
||
| # Combine samples with boundary points for evaluation | ||
| x_augmented = torch.cat([x_sorted, boundary_points], dim=0) | ||
| x_augmented = torch.sort(x_augmented, dim=0)[0] # Re-sort with boundaries included | ||
|
|
||
| # Evaluate current spline at augmented points (samples + boundaries) | ||
| basis = self.spline.basis(x_augmented, self.spline.k, self.spline.knots) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a thought on code style. I recently came across this repository where the authors follow a convention of including tensor dimensions in variable names (e.g., basis_B_X_Y_Z). While it might seem a bit verbose at first, I found it significantly improves readability and helps prevent shape-mismatch errors during complex operations. What do you think about adopting a similar naming convention here to make the tensor flow more explicit?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @adendek, personally, I don't like it as it feels too verbose. Something that could help is adding the shapes to the documentation, as in the torch doc, for example There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sense! It is just a matter of convention and personal preference. |
||
| y_eval = torch.einsum("bil,iol->bio", basis, self.spline.control_points) | ||
|
|
||
| # Create new grid | ||
| new_grid = get_grid(num_interval) | ||
|
|
||
| if mode == 'grid': | ||
| # For 'grid' mode, use denser sampling | ||
| sample_grid = get_grid(2 * num_interval) | ||
| x_augmented = sample_grid.transpose(0, 1) # (batch_size, input_dimensions) | ||
| basis = self.spline.basis(x_augmented, self.spline.k, self.spline.knots) | ||
| y_eval = torch.einsum("bil,iol->bio", basis, self.spline.control_points) | ||
|
|
||
| # Add grid extensions if needed | ||
| if self.grid_extension: | ||
| h = (new_grid[:, [-1]] - new_grid[:, [0]]) / (new_grid.shape[1] - 1) | ||
| for i in range(self.k): | ||
| new_grid = torch.cat([new_grid[:, [0]] - h, new_grid], dim=1) | ||
| new_grid = torch.cat([new_grid, new_grid[:, [-1]] + h], dim=1) | ||
|
|
||
| # Update grid and refit coefficients | ||
| self.spline.knots = new_grid | ||
|
|
||
| try: | ||
| # Refit coefficients using augmented points (preserves boundaries) | ||
| self.spline.compute_control_points(x_augmented, y_eval) | ||
| except Exception as e: | ||
| print(f"Warning: Failed to update coefficients during grid refinement: {e}") | ||
|
|
||
| def update_grid_resolution(self, new_num: int): | ||
| """ | ||
| Update grid resolution to a new number of intervals. | ||
| """ | ||
| with torch.no_grad(): | ||
| # Sample the current spline function on a dense grid | ||
| x_eval = torch.linspace( | ||
| self.grid_range[0], | ||
| self.grid_range[1], | ||
| steps=2 * new_num, | ||
| device=self.spline.knots.device | ||
| ) | ||
| x_eval = x_eval.unsqueeze(1).expand(-1, self.input_dimensions) | ||
|
|
||
| basis = self.spline.basis(x_eval, self.spline.k, self.spline.knots) | ||
| y_eval = torch.einsum("bil,iol->bio", basis, self.spline.control_points) | ||
|
|
||
| # Update num and create a new grid | ||
| self.num = new_num | ||
| new_grid = torch.linspace( | ||
| self.grid_range[0], | ||
| self.grid_range[1], | ||
| steps=self.num + 1, | ||
| device=self.spline.knots.device | ||
| ) | ||
| new_grid = new_grid[None, :].expand(self.input_dimensions, self.num + 1) | ||
|
|
||
| if self.grid_extension: | ||
| h = (new_grid[:, [-1]] - new_grid[:, [0]]) / (new_grid.shape[1] - 1) | ||
| for i in range(self.k): | ||
| new_grid = torch.cat([new_grid[:, [0]] - h, new_grid], dim=1) | ||
| new_grid = torch.cat([new_grid, new_grid[:, [-1]] + h], dim=1) | ||
|
|
||
| # Update spline with the new grid and re-compute control points | ||
| self.spline.knots = new_grid | ||
| self.spline.compute_control_points(x_eval, y_eval) | ||
|
|
||
| def get_grid_statistics(self): | ||
| """Get statistics about the current grid for debugging/analysis""" | ||
| return { | ||
| 'grid_shape': self.spline.knots.shape, | ||
| 'grid_min': self.spline.knots.min().item(), | ||
| 'grid_max': self.spline.knots.max().item(), | ||
| 'grid_range': (self.spline.knots.max() - self.spline.knots.min()).mean().item(), | ||
| 'num_intervals': self.spline.knots.shape[1] - 1 - (2*self.k if self.spline.grid_extension else 0) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doc is incomplete