Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e20b6f0
Added file with unit tests for slicer averagers based on constructed …
ehewins Aug 23, 2023
2743ffb
Initial version of averagers with cartesian ROI, and made correspondi…
ehewins Aug 31, 2023
aacf1d7
Changed the binning/weighting process for _Slab to make the fractiona…
ehewins Sep 1, 2023
62e85b4
Refactoring to test SlabX, SlabY, SectorQ and SectorPhi rather than t…
ehewins Sep 4, 2023
a0f365c
Restructured ROI classes and added DirectionalAverage class, which of…
ehewins Sep 7, 2023
7b6188a
Updated the unit tests to suit the new_manipulations.py implementation.
ehewins Sep 7, 2023
acc2bc7
Added dedicated WedgeQ and WedgePhi classes, plus corresponding unit …
ehewins Sep 8, 2023
204bc63
Added documentation to new manipulations module
ehewins Sep 17, 2023
6dccdeb
Replaced python logical_and with numpy logical_and for speed
ehewins Sep 17, 2023
adff0d2
Removed some superfluous logical_and checks. Both arrays should have …
ehewins Sep 17, 2023
078b2dd
Forgot to remove 'angles + np.pi' from SectorQ call, no longer needed…
ehewins Sep 18, 2023
85cd0ef
Added unit tests for DirectionalAverage class
ehewins Sep 22, 2023
31eae90
Move averaging tests from data loader to manipulations folder
krzywon Oct 16, 2023
3469460
Move files used in averaging tests
krzywon Oct 16, 2023
dbacebd
Create and apply interval type enum to remove hard-coded strings
krzywon Oct 16, 2023
9941521
Allow for non-linear bin spacings in the directional averaging
krzywon Oct 16, 2023
8eba5a9
Update unit tests to account for new bin widths
krzywon Oct 16, 2023
53965a4
Rename manipulations_new to averaging and update internal references …
krzywon Oct 16, 2023
bb66e0a
Add deprecation warning that is triggered on import of manipulations.py
krzywon Oct 16, 2023
82af368
Use Enum instead of StrEnum to ensure backwards compatibility with py…
krzywon Oct 16, 2023
953a89b
Move 2D data restructure function to data_info where it is more seman…
krzywon Oct 24, 2023
053c3af
Grammar
lucas-wilkins Oct 24, 2023
5ea8793
Revert removal of reader2d_converter from manipulations
krzywon Oct 25, 2023
0296c49
Update deprecation messages
krzywon Oct 25, 2023
db3147b
Port RingCut from manipulations to averaging
krzywon Oct 25, 2023
044b1e6
Port Boxcut from manipulations to averaging
krzywon Oct 25, 2023
4900864
Update documentation in manipulations to point to new test location
krzywon Oct 25, 2023
7aa9bf3
Move Sectorcut to averaging from manipulations
krzywon Oct 25, 2023
2d2874b
Use unmasked data for masking purposes
krzywon Oct 25, 2023
14a8f12
Fix issue where sector cut only masked one half of region
krzywon Oct 26, 2023
07f4bed
Type hinting
lucas-wilkins Dec 6, 2023
cc05ea7
Fix for first bug
lucas-wilkins Dec 7, 2023
91da04a
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Oct 27, 2025
3a13b2a
Merge branch 'master' into 46-manipulations-dot-py-rewrite
dehoni Nov 12, 2025
2c3a2a4
test must provide coordinate arrays for binning and can only check th…
dehoni Nov 13, 2025
78d1f53
Test_no_limits_on_an_axis must provide arrays for binning.
dehoni Nov 13, 2025
8be766c
Merge branch 'master' into 46-manipulations-dot-py-rewrite
dehoni Nov 14, 2025
fdfefd9
trying to appease codescene
dehoni Nov 14, 2025
81e8e69
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 14, 2025
1c5bef6
rewrite to address codescene review comments
dehoni Nov 15, 2025
930e73d
fixing merge conflict
dehoni Nov 15, 2025
f02ba2a
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 15, 2025
bb4af70
reduce complexity in averaging_tests
dehoni Nov 15, 2025
d88e74e
fix merge conflict
dehoni Nov 15, 2025
6de1232
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 15, 2025
726143a
further reducing code complexity and refactoring
dehoni Nov 15, 2025
ada3e09
merge diverged branch
dehoni Nov 15, 2025
493a396
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 15, 2025
4097685
further reducing code complexity
dehoni Nov 15, 2025
bca114b
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 15, 2025
b8a926f
clean up utest_averaging_directional.py
dehoni Nov 15, 2025
4cede2a
clean up utest_averaging_directional.py
dehoni Nov 15, 2025
520a3ce
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 15, 2025
704ec2f
include offset center of ROI fixes sasdata issue 22
dehoni Nov 18, 2025
d328eef
[pre-commit.ci lite] apply automatic fixes for ruff linting errors
pre-commit-ci-lite[bot] Nov 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
669 changes: 669 additions & 0 deletions sasdata/data_util/averaging.py

Large diffs are not rendered by default.

216 changes: 216 additions & 0 deletions sasdata/data_util/binning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import numpy as np
from numpy.typing import ArrayLike

from sasdata.data_util.interval import IntervalType


class DirectionalAverage:
"""
Average along one coordinate axis of 2D data and return data for a 1D plot.
This can also be thought of as a projection onto the major axis: 2D -> 1D.

This class operates on a decomposed Data2D object, and returns data needed
to construct a Data1D object. The class is instantiated with two arrays of
orthogonal coordinate data (depending on the coordinate system, these may
have undergone some pre-processing) and two corresponding two-element
tuples/lists defining the lower and upper limits on the Region of Interest
(ROI) for each coordinate axis. One of these axes is averaged along, and
the other is divided into bins and becomes the dependent variable of the
eventual 1D plot. These are called the minor and major axes respectively.
When a class instance is called, it is passed the intensity and error data
from the original Data2D object. These should not have undergone any
coordinate system dependent pre-processing.

Note that the old version of manipulations.py had an option for logarithmic
binning which was only used by SectorQ. This functionality is never called
upon by SasView however, so I haven't implemented it here (yet).
"""

def __init__(self,
major_axis: ArrayLike,
minor_axis: ArrayLike,
lims: tuple[tuple[float, float] | None, tuple[float, float] | None] | None = None,
nbins: int = 100):
"""
Set up direction of averaging, limits on the ROI, & the number of bins.

:param major_axis: Coordinate data for axis onto which the 2D data is
projected.
:param minor_axis: Coordinate data for the axis perpendicular to the
major axis.
:param lims: Tuple (major_lims, minor_lims). Each element may be a
2-tuple or None.
:param nbins: The number of bins the major axis is divided up into.
"""

# Step 1: quick checks and parsing
self._validate_coordinate_arrays(major_axis, minor_axis)
major_lims, minor_lims = self._parse_lims(lims)
self.nbins = self._coerce_nbins(nbins)

# Step 2: assign arrays and check sizes
self.major_axis, self.minor_axis = self._assign_axes_and_check_lengths(major_axis, minor_axis)

# Step 3: set final limits and compute bin limits
self.major_lims, self.minor_lims = self._set_default_lims_and_bin_limits(major_lims, minor_lims)

def _validate_coordinate_arrays(self, major_axis, minor_axis) -> None:
"""Ensure both major and minor coordinate inputs are array-like."""
if any(not hasattr(coordinate_data, "__array__") for
coordinate_data in (major_axis, minor_axis)):
msg = "Must provide major & minor coordinate arrays for binning."
raise ValueError(msg)

def _parse_lims(self, lims):
"""
Validate the lims parameter and return (major_lims, minor_lims).
Accepts None or a 2-tuple (major_lims, minor_lims). Each of the two
elements may be None or a 2-tuple of floats.
"""
if lims is None:
return None, None

if not (isinstance(lims, (list, tuple)) and len(lims) == 2):
msg = "Parameter 'lims' must be a 2-tuple (major_lims, minor_lims) or None."
raise ValueError(msg)

major_lims, minor_lims = lims
return major_lims, minor_lims

def _coerce_nbins(self, nbins):
"""Coerce nbins to int, raising a TypeError with the original message on failure."""
try:
return int(nbins)
except Exception:
msg = f"Parameter 'nbins' must be convertable to an integer via int(), got type {type(nbins)} (={nbins})"
raise TypeError(msg)

def _assign_axes_and_check_lengths(self, major_axis, minor_axis):
"""Assign axes to numpy arrays and check they have equal length."""
major_arr = np.asarray(major_axis)
minor_arr = np.asarray(minor_axis)
if major_arr.size != minor_arr.size:
msg = "Major and minor axes must have same length"
raise ValueError(msg)
return major_arr, minor_arr

def _set_default_lims_and_bin_limits(self, major_lims, minor_lims):
"""
Determine final major and minor limits (using data min/max if None)
and compute bin_limits based on major_lims and self.nbins.
Returns (major_lims_final, minor_lims_final).
"""
# Major limits
if major_lims is None:
major_lims_final = (self.major_axis.min(), self.major_axis.max())
else:
major_lims_final = major_lims

# Minor limits
if minor_lims is None:
minor_lims_final = (self.minor_axis.min(), self.minor_axis.max())
else:
minor_lims_final = minor_lims

# Store and compute bin limits (nbins + 1 points for boundaries)
self.bin_limits = np.linspace(major_lims_final[0], major_lims_final[1], self.nbins + 1)

return major_lims_final, minor_lims_final

@property
def bin_widths(self) -> np.ndarray:
"""Return a numpy array of all bin widths, regardless of the point spacings."""
return np.asarray([self.bin_width_n(i) for i in range(0, self.nbins)])

def bin_width_n(self, bin_number: int) -> float:
"""Calculate the bin width for the nth bin.
:param bin_number: The starting array index of the bin between 0 and self.nbins - 1.
:return: The bin width, as a float.
"""
lower, upper = self.get_bin_interval(bin_number)
return upper - lower

def get_bin_interval(self, bin_number: int) -> tuple[float, float]:

"""
Return the lower and upper limits defining a bin, given its index.

:param bin_number: The index of the bin (between 0 and self.nbins - 1)
:return: A tuple of the interval limits as (lower, upper).
"""
# Ensure bin_number is an integer and not a float or a string representation
bin_number = int(bin_number)
return self.bin_limits[bin_number], self.bin_limits[bin_number+1]

def get_bin_index(self, value):
"""
Return the index of the bin to which the supplied value belongs.

:param value: A coordinate value from somewhere along the major axis.
"""
numerator = value - self.major_lims[0]
denominator = self.major_lims[1] - self.major_lims[0]
bin_index = int(np.floor(self.nbins * numerator / denominator))

# Bins are indexed from 0 to nbins-1, so this check protects against
# out-of-range indices when value == self.major_lims[1]
if bin_index == self.nbins:
bin_index -= 1

return bin_index

def compute_weights(self):
"""
Return weights array for the contribution of each datapoint to each bin

Each row of the weights array corresponds to the bin with the same
index.
"""
major_weights = np.zeros((self.nbins, self.major_axis.size))
closed = IntervalType.CLOSED
for m in range(self.nbins):
# Include the value at the end of the binning range, but in
# general use half-open intervals so each value belongs in only
# one bin.
if m == self.nbins - 1:
interval = closed
else:
interval = IntervalType.HALF_OPEN
bin_start, bin_end = self.get_bin_interval(bin_number=m)
major_weights[m] = interval.weights_for_interval(array=self.major_axis,
l_bound=bin_start,
u_bound=bin_end)
minor_weights = closed.weights_for_interval(array=self.minor_axis,
l_bound=self.minor_lims[0],
u_bound=self.minor_lims[1])
return major_weights * minor_weights

def __call__(self, data, err_data):
"""
Compute the directional average of the supplied intensity & error data.

:param data: intensity data from the origninal Data2D object.
:param err_data: the corresponding errors for the intensity data.
"""
weights = self.compute_weights()

x_axis_values = np.sum(weights * self.major_axis, axis=1)
intensity = np.sum(weights * data, axis=1)
errs_squared = np.sum((weights * err_data)**2, axis=1)

bin_counts = np.sum(weights, axis=1)
# Prepare results, only compute division where bin_counts > 0
if not np.any(bin_counts > 0):
raise ValueError("Average Error: No bins inside ROI to average...")

errors = np.sqrt(errs_squared)
x_axis_values /= bin_counts
intensity /= bin_counts
errors /= bin_counts

finite = np.isfinite(intensity)
if not finite.any():
msg = "Average Error: No points inside ROI to average..."
raise ValueError(msg)

return x_axis_values[finite], intensity[finite], errors[finite]
35 changes: 35 additions & 0 deletions sasdata/data_util/interval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from enum import Enum, auto

import numpy as np


class IntervalType(Enum):
HALF_OPEN = auto()
CLOSED = auto()

def weights_for_interval(self, array, l_bound, u_bound):
"""
Weight coordinate data by position relative to a specified interval.

:param array: the array for which the weights are calculated
:param l_bound: value defining the lower limit of the region of interest
:param u_bound: value defining the upper limit of the region of interest

If and when fractional binning is implemented (ask Lucas), this function
will be changed so that instead of outputting zeros and ones, it gives
fractional values instead. These will depend on how close the array value
is to being within the interval defined.
"""

# Whether the endpoint should be included depends on circumstance.
# Half-open is used when binning the major axis (except for the final bin)
# and closed used for the minor axis and the final bin of the major axis.
if self.name.lower() == 'half_open':
in_range = np.logical_and(l_bound <= array, array < u_bound)
elif self.name.lower() == 'closed':
in_range = np.logical_and(l_bound <= array, array <= u_bound)
else:
msg = f"Unrecognised interval_type: {self.name}"
raise ValueError(msg)

return np.asarray(in_range, dtype=int)
Loading
Loading