multiplatform tests for qwix.

Qwix Developers · copybara-github · commit 1eaff084b2a1 · 2025-10-07T18:19:45.000-07:00
PiperOrigin-RevId: 816464117
diff --git a/tests/core/ragged_dot_qt_test.py b/tests/core/ragged_dot_qt_test.py
@@ -57,8 +57,8 @@ class RaggedDotQtTest(parameterized.TestCase):
   @parameterized.named_parameters(
       dict(
           testcase_name="fp8",
-          lhs_qtype=jnp.float8_e4m3,
-          rhs_qtype=jnp.float8_e4m3,
+          lhs_qtype=jnp.float8_e4m3fn,
+          rhs_qtype=jnp.float8_e4m3fn,
           expected_mae_fq_out=1e-6,
           expected_mae_fq_dlhs=1e-6,
           expected_mae_fq_drhs=1e-6,
@@ -68,9 +68,9 @@ class RaggedDotQtTest(parameterized.TestCase):
       ),
       dict(
           testcase_name="fp8_bwd",
-          lhs_qtype=jnp.float8_e4m3,
-          rhs_qtype=jnp.float8_e4m3,
-          bwd_qtype=jnp.float8_e4m3,
+          lhs_qtype=jnp.float8_e4m3fn,
+          rhs_qtype=jnp.float8_e4m3fn,
+          bwd_qtype=jnp.float8_e4m3fn,
           expected_mae_fq_out=1e-6,
           expected_mae_fq_dlhs=0.03,
           expected_mae_fq_drhs=0.03,
@@ -154,8 +154,8 @@ def test_traced_group_sizes(self):
     lhs = jax.random.normal(jax.random.key(0), (256, 64), jnp.float32)
     rhs = jax.random.normal(jax.random.key(1), (8, 64, 128), jnp.float32)
     config = ragged_dot_qt.RaggedDotQtConfig(
-        lhs_qtype=jnp.float8_e4m3,
-        rhs_qtype=jnp.float8_e4m3,
+        lhs_qtype=jnp.float8_e4m3fn,
+        rhs_qtype=jnp.float8_e4m3fn,
     )
 
     @jax.jit
diff --git a/tests/core/ragged_dot_test.py b/tests/core/ragged_dot_test.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 from unittest import mock
 from absl.testing import absltest
 from absl.testing import parameterized
@@ -21,51 +22,156 @@
 from qwix._src.core import ragged_dot
 
 
-def mae(a, b):
+def rel_mae(a, b):
   assert a.dtype == b.dtype and a.shape == b.shape
   return jnp.abs(a - b).mean() / jnp.abs(a).mean()
 
 
 class RaggedDotTest(parameterized.TestCase):
 
+  def setUp(self):
+    super().setUp()
+    self._random_key = jax.random.key(42)
+
+  def _make_array(self, shape, asymmetric=False):
+    self._random_key, key = jax.random.split(self._random_key)
+    if asymmetric:
+      return jax.random.uniform(key, shape, jnp.float32)
+    return jax.random.normal(key, shape, jnp.float32)
+
   @parameterized.named_parameters(
       dict(
-          testcase_name='no_channelwise',
-          lhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[]),
-          rhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[]),
+          testcase_name='int8',
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(qtype=jnp.int8),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(qtype=jnp.int8),
+          group_sizes=(64, 32, 16, 16),
+          expected_mae=0.03,
+      ),
+      dict(
+          testcase_name='lhs_asymmetric',
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              calibration_method='minmax',
+          ),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              calibration_method='absmax',
+          ),
+          group_sizes=(50, 50, 28, 0),
+          expected_mae=0.07,
+          disable_fast_ragged_dot=True,
+      ),
+      dict(
+          testcase_name='rhs_group_channelwise',
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              calibration_method='absmax',
+          ),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              channelwise_axes=(0,),
+              calibration_method='absmax',
+          ),
+          group_sizes=(128, 0, 0, 0),
+          expected_mae=0.03,
+          disable_fast_ragged_dot=True,
+      ),
+      dict(
+          testcase_name='rhs_contracting_tiled',
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              calibration_method='absmax',
+          ),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(
+              qtype=jnp.int8,
+              tiled_axes={1: 128},
+              calibration_method='absmax',
+          ),
+          group_sizes=(10, 20, 30, 68),
+          expected_mae=0.03,
+          disable_fast_ragged_dot=True,
       ),
       dict(
           testcase_name='channelwise',
-          lhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[0]),
-          rhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[2]),
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(
+              qtype=jnp.float8_e5m2,
+              channelwise_axes=(0,),
+          ),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(
+              qtype=jnp.float8_e5m2,
+              channelwise_axes=(2,),
+          ),
+          group_sizes=(128, 100, 0, 28),
+          expected_mae=0.08,
       ),
       dict(
-          testcase_name='more_channelwise',
-          lhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[0]),
-          rhs_how=qarray.HowToQuantize(qtype=jnp.int8, channelwise_axes=[0, 2]),
+          testcase_name='rhs_group_and_out_channelwise',
+          lhs_shape=(128, 256),
+          lhs_how=qarray.HowToQuantize(
+              qtype=jnp.float8_e5m2,
+              channelwise_axes=(0,),
+          ),
+          rhs_shape=(4, 256, 64),
+          rhs_how=qarray.HowToQuantize(
+              qtype=jnp.float8_e5m2,
+              channelwise_axes=(0, 2),
+          ),
+          group_sizes=(128, 100, 0, 28),
+          expected_mae=0.08,
       ),
   )
   def test_ragged_dot(
       self,
-      lhs_how,
-      rhs_how,
-      disable_fast_path=False,
+      *,
+      lhs_shape: tuple[int, ...],
+      lhs_how: qarray.HowToQuantize | None,
+      rhs_shape: tuple[int, ...],
+      rhs_how: qarray.HowToQuantize | None,
+      group_sizes: tuple[int, ...],
+      expected_mae: float,
+      disable_fast_ragged_dot: bool = False,
   ):
-    lhs = jax.random.normal(jax.random.key(0), (256, 16), jnp.bfloat16)
-    rhs = jax.random.normal(jax.random.key(1), (10, 16, 64), jnp.bfloat16)
-    group_sizes = jnp.array([10, 20, 30, 40, 0, 115, 6, 7, 1, 27], jnp.int32)
+    lhs_asymmetric = (
+        lhs_how.calibration_method == 'minmax' if lhs_how else False
+    )
+    rhs_asymmetric = (
+        rhs_how.calibration_method == 'minmax' if rhs_how else False
+    )
+    lhs = self._make_array(lhs_shape, lhs_asymmetric)
+    rhs = self._make_array(rhs_shape, rhs_asymmetric)
+    group_sizes = jnp.array(group_sizes)
 
-    fp_res = jax.lax.ragged_dot(lhs, rhs, group_sizes)
+    q_lhs = qarray.quantize(lhs, lhs_how) if lhs_how else lhs
+    q_rhs = qarray.quantize(rhs, rhs_how) if rhs_how else rhs
 
-    qlhs = qarray.quantize(lhs, lhs_how)
-    qrhs = qarray.quantize(rhs, rhs_how)
+    @jax.jit
+    def _multi_ragged_dot(lhs, rhs, fp_res):
+      slow_res = ragged_dot._slow_ragged_dot(lhs, rhs, group_sizes)
+      if disable_fast_ragged_dot:
+        fast_res = slow_res
+      else:
+        fast_res = ragged_dot._fast_ragged_dot(lhs, rhs, group_sizes)
+      return (
+          rel_mae(slow_res, fp_res),
+          rel_mae(slow_res, fast_res),
+      )
 
-    slow_res = ragged_dot._slow_ragged_dot(qlhs, qrhs, group_sizes)
-    self.assertLess(mae(slow_res, fp_res), 0.02)
+    fp_res = jax.lax.ragged_dot(lhs, rhs, group_sizes)
+    fp_mae, fast_mae = _multi_ragged_dot(q_lhs, q_rhs, fp_res)
 
-    if not disable_fast_path:
-      fast_res = ragged_dot._fast_ragged_dot(qlhs, qrhs, group_sizes)
-      self.assertLess(mae(fast_res, slow_res), 0.005)
+    logging.info('fp_mae=%s fast_mae=%s', fp_mae, fast_mae)
+    self.assertLessEqual(fp_mae, expected_mae)
+    self.assertLessEqual(fast_mae, 0.003)
 
   @parameterized.named_parameters(
       dict(
diff --git a/tests/core/ragged_dot_tpu_test.py b/tests/core/ragged_dot_tpu_test.py