Refactor newaxis handling: move to __getitem__ level, unify with Subtensor approach

Copilot · ricardoV94 · jaanerik · commit a3634dda25ff · 2025-11-28T16:41:18.000+02:00
Co-authored-by: ricardoV94 &lt;28983449+ricardoV94@users.noreply.github.com&gt;
diff --git a/pytensor/tensor/subtensor.py b/pytensor/tensor/subtensor.py
@@ -2612,16 +2612,12 @@ def make_node(self, x, *inputs):
         if len(inputs) != len(expected_inputs):
             raise ValueError(f"Expected {len(expected_inputs)} inputs but got {len(inputs)}")
 
-        # Build explicit_indices for shape inference
+        # Build explicit_indices for shape inference (newaxis handled by __getitem__)
         explicit_indices = []
-        new_axes = []
         input_idx = 0
         
         for i, entry in enumerate(idx_list):
-            if entry is np.newaxis:
-                new_axes.append(len(explicit_indices))
-                explicit_indices.append(np.newaxis)
-            elif isinstance(entry, slice):
+            if isinstance(entry, slice):
                 # Reconstruct slice with actual values from inputs
                 if entry.start is not None and isinstance(entry.start, Type):
                     start_val = inputs[input_idx]
@@ -2655,7 +2651,7 @@ def make_node(self, x, *inputs):
                         )
 
                     # Check static shape aligned
-                    axis = len(explicit_indices) - len(new_axes)
+                    axis = len(explicit_indices)
                     indexed_shape = x.type.shape[axis : axis + inp.type.ndim]
                     for j, (indexed_length, indexer_length) in enumerate(
                         zip(indexed_shape, inp.type.shape)
@@ -2681,25 +2677,20 @@ def make_node(self, x, *inputs):
             else:
                 raise ValueError(f"Invalid entry in idx_list: {entry}")
 
-        if (len(explicit_indices) - len(new_axes)) > x.type.ndim:
+        if len(explicit_indices) > x.type.ndim:
             raise IndexError(
-                f"too many indices for array: tensor is {x.type.ndim}-dimensional, but {len(explicit_indices) - len(new_axes)} were indexed"
+                f"too many indices for array: tensor is {x.type.ndim}-dimensional, but {len(explicit_indices)} were indexed"
             )
 
-        # Perform basic and advanced indexing shape inference separately
+        # Perform basic and advanced indexing shape inference separately (no newaxis)
         basic_group_shape = []
         advanced_indices = []
         adv_group_axis = None
         last_adv_group_axis = None
-        expanded_x_shape = tuple(
-            np.insert(np.array(x.type.shape, dtype=object), 1, new_axes)
-        )
         for i, (idx, dim_length) in enumerate(
-            zip_longest(explicit_indices, expanded_x_shape, fillvalue=slice(None))
+            zip_longest(explicit_indices, x.type.shape, fillvalue=slice(None))
         ):
-            if idx is np.newaxis:
-                basic_group_shape.append(1)  # New-axis
-            elif isinstance(idx, slice):
+            if isinstance(idx, slice):
                 basic_group_shape.append(slice_static_length(idx, dim_length))
             else:  # TensorType (advanced index)
                 # Keep track of advanced group axis
@@ -2752,16 +2743,14 @@ def is_bool_index(idx):
                 or getattr(idx, "dtype", None) == "bool"
             )
 
-        # Reconstruct the full indices from idx_list and inputs (like perform method)
+        # Reconstruct the full indices from idx_list and inputs (newaxis handled by __getitem__)
         inputs = node.inputs[1:]
         
         full_indices = []
         input_idx = 0
         
         for entry in self.idx_list:
-            if entry is np.newaxis:
-                full_indices.append(np.newaxis)
-            elif isinstance(entry, slice):
+            if isinstance(entry, slice):
                 # Reconstruct slice from idx_list and inputs
                 if entry.start is not None and isinstance(entry.start, Type):
                     start_val = inputs[input_idx]
@@ -2794,8 +2783,6 @@ def is_bool_index(idx):
         for idx in full_indices:
             if isinstance(idx, slice):
                 index_shapes.append(idx)
-            elif idx is np.newaxis:
-                index_shapes.append(idx)
             elif hasattr(idx, 'type'):
                 # Mixed bool indexes are converted to nonzero entries
                 shape0_op = Shape_i(0)
@@ -2837,17 +2824,15 @@ def is_bool_index(idx):
     def perform(self, node, inputs, out_):
         (out,) = out_
         
-        # Reconstruct the full tuple of indices from idx_list and inputs
+        # Reconstruct the full tuple of indices from idx_list and inputs (newaxis handled by __getitem__)
         x = inputs[0]
         tensor_inputs = inputs[1:]
         
         full_indices = []
         input_idx = 0
         
         for entry in self.idx_list:
-            if entry is np.newaxis:
-                full_indices.append(np.newaxis)
-            elif isinstance(entry, slice):
+            if isinstance(entry, slice):
                 # Reconstruct slice from idx_list and inputs
                 if entry.start is not None and isinstance(entry.start, Type):
                     start_val = tensor_inputs[input_idx]
@@ -2938,7 +2923,7 @@ def non_consecutive_adv_indexing(node: Apply) -> bool:
         bool
             True if the advanced indexing is non-consecutive, False otherwise.
         """
-        # Reconstruct the full indices from idx_list and inputs to check consecutivity
+        # Reconstruct the full indices from idx_list and inputs to check consecutivity (newaxis handled by __getitem__)
         op = node.op
         tensor_inputs = node.inputs[1:]
         
@@ -2948,8 +2933,6 @@ def non_consecutive_adv_indexing(node: Apply) -> bool:
         for entry in op.idx_list:
             if isinstance(entry, slice):
                 full_indices.append(slice(None))  # Represent as basic slice
-            elif entry is np.newaxis:
-                full_indices.append(np.newaxis)
             elif isinstance(entry, Type):
                 # This is a numerical index - get from inputs
                 if input_idx < len(tensor_inputs):
@@ -3035,14 +3018,12 @@ def make_node(self, x, y, *inputs):
     def perform(self, node, inputs, out_):
         x, y, *tensor_inputs = inputs
 
-        # Reconstruct the full tuple of indices from idx_list and inputs
+        # Reconstruct the full tuple of indices from idx_list and inputs (newaxis handled by __getitem__)
         full_indices = []
         input_idx = 0
         
         for entry in self.idx_list:
-            if entry is np.newaxis:
-                full_indices.append(np.newaxis)
-            elif isinstance(entry, slice):
+            if isinstance(entry, slice):
                 # Reconstruct slice from idx_list and inputs
                 if entry.start is not None and isinstance(entry.start, Type):
                     start_val = tensor_inputs[input_idx]
@@ -3154,7 +3135,7 @@ def non_consecutive_adv_indexing(node: Apply) -> bool:
         bool
             True if the advanced indexing is non-consecutive, False otherwise.
         """
-        # Reconstruct the full indices from idx_list and inputs to check consecutivity
+        # Reconstruct the full indices from idx_list and inputs to check consecutivity (newaxis handled by __getitem__)
         op = node.op
         tensor_inputs = node.inputs[2:]  # Skip x and y
         
@@ -3164,8 +3145,6 @@ def non_consecutive_adv_indexing(node: Apply) -> bool:
         for entry in op.idx_list:
             if isinstance(entry, slice):
                 full_indices.append(slice(None))  # Represent as basic slice
-            elif entry is np.newaxis:
-                full_indices.append(np.newaxis)
             elif isinstance(entry, Type):
                 # This is a numerical index - get from inputs
                 if input_idx < len(tensor_inputs):
@@ -3180,6 +3159,9 @@ def advanced_subtensor(x, *args):
     
     This function converts the arguments to work with the new AdvancedSubtensor 
     interface that separates slice structure from variable inputs.
+    
+    Note: newaxis (None) should be handled by __getitem__ using dimshuffle
+    before calling this function.
     """
     # Convert args using as_index_variable (like original AdvancedSubtensor did)
     processed_args = tuple(map(as_index_variable, args))
@@ -3189,9 +3171,7 @@ def advanced_subtensor(x, *args):
     input_vars = []
     
     for arg in processed_args:
-        if isinstance(arg.type, NoneTypeT):
-            idx_list.append(np.newaxis)
-        elif isinstance(arg.type, SliceType):
+        if isinstance(arg.type, SliceType):
             # Handle SliceType - extract components and structure  
             if isinstance(arg, Constant):
                 # Constant slice
@@ -3218,15 +3198,19 @@ def advanced_subtensor(x, *args):
                 # Other slice case
                 idx_list.append(slice(None))
         else:
-            # Tensor index
+            # Tensor index (should not be NoneType since newaxis handled in __getitem__)
             idx_list.append(index_vars_to_types(arg))
             input_vars.append(arg)
     
     return AdvancedSubtensor(idx_list).make_node(x, *input_vars).outputs[0]
 
 
 def advanced_inc_subtensor(x, y, *args, **kwargs):
-    """Create an AdvancedIncSubtensor operation for incrementing."""
+    """Create an AdvancedIncSubtensor operation for incrementing.
+    
+    Note: newaxis (None) should be handled by __getitem__ using dimshuffle
+    before calling this function.
+    """
     # Convert args using as_index_variable (like original AdvancedIncSubtensor would)
     processed_args = tuple(map(as_index_variable, args))
     
@@ -3235,9 +3219,7 @@ def advanced_inc_subtensor(x, y, *args, **kwargs):
     input_vars = []
     
     for arg in processed_args:
-        if isinstance(arg.type, NoneTypeT):
-            idx_list.append(np.newaxis)
-        elif isinstance(arg.type, SliceType):
+        if isinstance(arg.type, SliceType):
             # Handle SliceType - extract components and structure  
             if isinstance(arg, Constant):
                 # Constant slice
@@ -3264,7 +3246,7 @@ def advanced_inc_subtensor(x, y, *args, **kwargs):
                 # Other slice case
                 idx_list.append(slice(None))
         else:
-            # Tensor index
+            # Tensor index (should not be NoneType since newaxis handled in __getitem__)
             idx_list.append(index_vars_to_types(arg))
             input_vars.append(arg)
     
diff --git a/pytensor/tensor/variable.py b/pytensor/tensor/variable.py
@@ -539,55 +539,55 @@ def is_empty_array(val):
                     else:
                         advanced = True
 
-        if advanced:
-            return pt.subtensor.advanced_subtensor(self, *args)
-        else:
-            if np.newaxis in args or NoneConst in args:
-                # `np.newaxis` (i.e. `None`) in NumPy indexing mean "add a new
-                # broadcastable dimension at this location".  Since PyTensor adds
-                # new broadcastable dimensions via the `DimShuffle` `Op`, the
-                # following code uses said `Op` to add one of the new axes and
-                # then uses recursion to apply any other indices and add any
-                # remaining new axes.
-
-                counter = 0
-                pattern = []
-                new_args = []
-                for arg in args:
-                    if arg is np.newaxis or arg is NoneConst:
-                        pattern.append("x")
-                        new_args.append(slice(None, None, None))
-                    else:
-                        pattern.append(counter)
-                        counter += 1
-                        new_args.append(arg)
-
-                pattern.extend(list(range(counter, self.ndim)))
-
-                view = self.dimshuffle(pattern)
-                full_slices = True
-                for arg in new_args:
-                    # We can't do arg == slice(None, None, None) as in
-                    # Python 2.7, this call __lt__ if we have a slice
-                    # with some symbolic variable.
-                    if not (
-                        isinstance(arg, slice)
-                        and (arg.start is None or arg.start is NoneConst)
-                        and (arg.stop is None or arg.stop is NoneConst)
-                        and (arg.step is None or arg.step is NoneConst)
-                    ):
-                        full_slices = False
-                if full_slices:
-                    return view
+        # Handle newaxis (None) for both basic and advanced indexing
+        if np.newaxis in args or NoneConst in args:
+            # `np.newaxis` (i.e. `None`) in NumPy indexing mean "add a new
+            # broadcastable dimension at this location".  Since PyTensor adds
+            # new broadcastable dimensions via the `DimShuffle` `Op`, the
+            # following code uses said `Op` to add one of the new axes and
+            # then uses recursion to apply any other indices and add any
+            # remaining new axes.
+
+            counter = 0
+            pattern = []
+            new_args = []
+            for arg in args:
+                if arg is np.newaxis or arg is NoneConst:
+                    pattern.append("x")
+                    new_args.append(slice(None, None, None))
                 else:
-                    return view.__getitem__(tuple(new_args))
+                    pattern.append(counter)
+                    counter += 1
+                    new_args.append(arg)
+
+            pattern.extend(list(range(counter, self.ndim)))
+
+            view = self.dimshuffle(pattern)
+            full_slices = True
+            for arg in new_args:
+                # We can't do arg == slice(None, None, None) as in
+                # Python 2.7, this call __lt__ if we have a slice
+                # with some symbolic variable.
+                if not (
+                    isinstance(arg, slice)
+                    and (arg.start is None or arg.start is NoneConst)
+                    and (arg.stop is None or arg.stop is NoneConst)
+                    and (arg.step is None or arg.step is NoneConst)
+                ):
+                    full_slices = False
+            if full_slices:
+                return view
             else:
-                return pt.subtensor.Subtensor(args)(
-                    self,
-                    *pt.subtensor.get_slice_elements(
-                        args, lambda entry: isinstance(entry, Variable)
-                    ),
-                )
+                return view.__getitem__(tuple(new_args))
+        elif advanced:
+            return pt.subtensor.advanced_subtensor(self, *args)
+        else:
+            return pt.subtensor.Subtensor(args)(
+                self,
+                *pt.subtensor.get_slice_elements(
+                    args, lambda entry: isinstance(entry, Variable)
+                ),
+            )
 
     def __setitem__(self, key, value):
         raise TypeError(