PythonOT
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 0 deletions b/‎README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎RELEASES.md‎
Lines changed: 1 addition & 0 deletions b/‎RELEASES.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/all.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/all.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/user_guide.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs/source/user_guide.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/backends/plot_ot_batch.py‎
Lines changed: 227 additions & 0 deletions b/‎examples/backends/plot_ot_batch.py‎
Lines changed: 227 additions & 0 deletions
diff --git a/‎examples/index.rst‎
Lines changed: 0 additions & 1 deletion b/‎examples/index.rst‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎ot/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎ot/__init__.py‎
Lines changed: 4 additions & 1 deletion
@@ -123,3 +123,5 @@ debug
 
 # pytest cahche
 .pytest_cache
+
+docs/source/
@@ -446,3 +446,8 @@ Artificial Intelligence.
 [79] Liu, X., Bai, Y., Martín, R. D., Shi, K., Shahbazi, A., Landman, B. A., Chang, C., & Kolouri, S. (2025). [Linear Spherical Sliced Optimal Transport: A Fast Metric for Comparing Spherical Data](https://openreview.net/forum?id=fgUFZAxywx). International Conference on Learning Representations.
 
 [80] Altschuler, J., Bach, F., Rudi, A., Niles-Weed, J., [Massively scalable Sinkhorn distances via the Nyström method](https://proceedings.neurips.cc/paper_files/paper/2019/file/f55cadb97eaff2ba1980e001b0bd9842-Paper.pdf), Advances in Neural Information Processing Systems, 2019.
+
+[81] Xu, H., Luo, D., & Carin, L. (2019). [Scalable Gromov-Wasserstein learning for graph partitioning and matching](https://proceedings.neurips.cc/paper/2019/hash/6e62a992c676f611616097dbea8ea030-Abstract.html). Neural Information Processing Systems (NeurIPS).
+
+
+```
@@ -26,6 +26,7 @@
 - Fix reg_div function compatibility with numpy in `ot.unbalanced.lbfgsb_unbalanced` via new function `ot.utils.fun_to_numpy` (PR #731)
 - Added to each example in the examples gallery the information about the release version in which it was introduced (PR #743)
 - Removed release information from quickstart guide (PR #744)
+- Implement batch parallel solvers in ot.batch (PR #745)
 - Update REAMDE with new API and reorganize examples (PR #754)
 
 #### Closed issues
 
@@ -15,6 +15,7 @@ API and modules
 
 
    backend
+   batch
    bregman
    coot
    da
 
@@ -1217,3 +1217,6 @@ References
     couplings <http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf>`_. In
     The 22nd International Conference on Artificial Intelligence and Statistics
     (pp. 2454-2465). PMLR.
+
+.. [41] Xu, H., Luo, D., & Carin, L. (2019). `Scalable Gromov-Wasserstein learning for graph partitioning and matching
+    <https://arxiv.org/abs/1906.03666>`_\ , Advances in neural information processing systems, 32.
@@ -0,0 +1,227 @@
+"""
+=================================================
+Solving Many Optimal Transport Problems in Parallel
+=================================================
+
+In some situations, one may want to solve many OT problems with the same
+structure (same number of samples, same cost function, etc.) at the same time.
+
+In that case using a for loop to solve the problems sequentially is inefficient.
+This example shows how to use the batch solvers implemented in POT to solve
+many problems in parallel on CPU or GPU (even more efficient on GPU).
+
+"""
+
+# Author: Paul Krzakala <paul.krzakala@gmail.com>
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 1
+
+
+#############################################################################
+#
+# Computing the Cost Matrices
+# ---------------------------------------------
+#
+# We want to create a batch of optimal transport problems with
+# :math:`n` samples in :math:`d` dimensions.
+#
+# To do this, we first need to compute the cost matrices for each problem.
+#
+# .. note::
+#    A straightforward approach would be to use a Python loop and
+#    :func:`ot.dist`.
+#    However, this is inefficient when working with batches.
+#
+# Instead, you can directly use :func:`ot.batch.dist_batch`, which computes
+# all cost matrices in parallel.
+
+import ot
+import numpy as np
+
+n_problems = 4  # nb problems/batch size
+n_samples = 8  # nb samples
+dim = 2  # nb dimensions
+
+np.random.seed(0)
+samples_source = np.random.randn(n_problems, n_samples, dim)
+samples_target = samples_source + 0.1 * np.random.randn(n_problems, n_samples, dim)
+
+# Naive approach
+M_list = []
+for i in range(n_problems):
+    M_list.append(
+        ot.dist(samples_source[i], samples_target[i])
+    )  # List of cost matrices n_samples x n_samples
+# Batched approach
+M_batch = ot.batch.dist_batch(
+    samples_source, samples_target
+)  # Array of cost matrices n_problems x n_samples x n_samples
+
+for i in range(n_problems):
+    assert np.allclose(M_list[i], M_batch[i])
+
+#############################################################################
+#
+# Solving the Problems
+# ---------------------------------------------
+#
+# Once the cost matrices are computed, we can solve the corresponding
+# optimal transport problems.
+#
+# .. note::
+#    One option is to solve them sequentially with a Python loop using
+#    :func:`ot.solve`.
+#    This is simple but inefficient for large batches.
+#
+# Instead, you can use :func:`ot.batch.solve_batch`, which solves all
+# problems in parallel.
+
+reg = 1.0
+max_iter = 100
+tol = 1e-3
+
+# Naive approach
+results_values_list = []
+for i in range(n_problems):
+    res = ot.solve(M_list[i], reg=reg, max_iter=max_iter, tol=tol, reg_type="entropy")
+    results_values_list.append(res.value_linear)
+
+# Batched approach
+results_batch = ot.batch.solve_batch(
+    M=M_batch, reg=reg, max_iter=max_iter, tol=tol, reg_type="entropy"
+)
+results_values_batch = results_batch.value_linear
+
+assert np.allclose(np.array(results_values_list), results_values_batch, atol=tol * 10)
+
+#############################################################################
+#
+# Comparing Computation Time
+# ---------------------------------------------
+#
+# We now compare the runtime of the two approaches on larger problems.
+#
+# .. note::
+#    The speedup obtained with :mod:`ot.batch` can be even more
+#    significant when computations are performed on a GPU.
+
+
+from time import perf_counter
+
+n_problems = 128
+n_samples = 8
+dim = 2
+reg = 10.0
+max_iter = 1000
+tol = 1e-3
+
+samples_source = np.random.randn(n_problems, n_samples, dim)
+samples_target = samples_source + 0.1 * np.random.randn(n_problems, n_samples, dim)
+
+
+def benchmark_naive(samples_source, samples_target):
+    start = perf_counter()
+    for i in range(n_problems):
+        M = ot.dist(samples_source[i], samples_target[i])
+        res = ot.solve(M, reg=reg, max_iter=max_iter, tol=tol, reg_type="entropy")
+    end = perf_counter()
+    return end - start
+
+
+def benchmark_batch(samples_source, samples_target):
+    start = perf_counter()
+    M_batch = ot.batch.dist_batch(samples_source, samples_target)
+    res_batch = ot.batch.solve_batch(
+        M=M_batch, reg=reg, max_iter=max_iter, tol=tol, reg_type="entropy"
+    )
+    end = perf_counter()
+    return end - start
+
+
+time_naive = benchmark_naive(samples_source, samples_target)
+time_batch = benchmark_batch(samples_source, samples_target)
+
+print(f"Naive approach time: {time_naive:.4f} seconds")
+print(f"Batched approach time: {time_batch:.4f} seconds")
+
+#############################################################################
+#
+# Gromov-Wasserstein
+# ---------------------------------------------
+#
+# The :mod:`ot.batch` module also provides a batched Gromov-Wasserstein solver.
+#
+# .. note::
+#    This solver is **not** equivalent to calling :func:`ot.solve_gromov`
+#    repeatedly in a loop.
+#
+# Key differences:
+#
+# - :func:`ot.solve_gromov`
+#   Uses the conditional gradient algorithm. Each inner iteration relies on
+#   an exact EMD solver.
+#
+# - :func:`ot.batch.solve_gromov_batch`
+#   Uses a proximal variant, where each inner iteration applies entropic
+#   regularization.
+#
+# As a result:
+#
+# - :func:`ot.solve_gromov` is usually faster on CPU
+# - :func:`ot.batch.solve_gromov_batch` is slower on CPU, but provides
+#   better objective values.
+#
+# .. tip::
+#    If your data is on a GPU, :func:`ot.batch.solve_gromov_batch`
+#    is significantly faster AND provides better objective values.
+
+from ot import solve_gromov
+from ot.batch import solve_gromov_batch
+
+
+def benchmark_naive_gw(samples_source, samples_target):
+    start = perf_counter()
+    avg_value = 0
+    for i in range(n_problems):
+        C1 = ot.dist(samples_source[i], samples_source[i])
+        C2 = ot.dist(samples_target[i], samples_target[i])
+        res = solve_gromov(C1, C2, max_iter=1000, tol=tol)
+        avg_value += res.value
+    avg_value /= n_problems
+    end = perf_counter()
+    return end - start, avg_value
+
+
+def benchmark_batch_gw(samples_source, samples_target):
+    start = perf_counter()
+    C1_batch = ot.batch.dist_batch(samples_source, samples_source)
+    C2_batch = ot.batch.dist_batch(samples_target, samples_target)
+    res_batch = solve_gromov_batch(
+        C1_batch, C2_batch, reg=1, max_iter=100, max_iter_inner=50, tol=tol
+    )
+    avg_value = np.mean(res_batch.value)
+    end = perf_counter()
+    return end - start, avg_value
+
+
+time_naive_gw, avg_value_naive_gw = benchmark_naive_gw(samples_source, samples_target)
+time_batch_gw, avg_value_batch_gw = benchmark_batch_gw(samples_source, samples_target)
+
+print(f"{'Method':<20}{'Time (s)':<15}{'Avg Value':<15}")
+print(f"{'Naive GW':<20}{time_naive_gw:<15.4f}{avg_value_naive_gw:<15.4f}")
+print(f"{'Batched GW':<20}{time_batch_gw:<15.4f}{avg_value_batch_gw:<15.4f}")
+
+#############################################################################
+#
+# In summary: no more for loops!
+# ---------------------------------------------
+
+import matplotlib.pyplot as plt
+
+fig, ax = plt.subplots(figsize=(4, 4))
+ax.text(0.5, 0.5, "For", fontsize=160, ha="center", va="center", zorder=0)
+ax.axis("off")
+ax.plot([0, 1], [0, 1], color="red", linewidth=10, zorder=1)
+ax.plot([0, 1], [1, 0], color="red", linewidth=10, zorder=1)
+plt.show()
@@ -33,7 +33,6 @@ Differentiable OT with PyTorch
     ../../examples/gaussian_gmm/plot_GMM_flow.py
     ../../examples/gromov/plot_gnn_TFGW.py
 
-
 Gromov-Wasserstein (GW) and Fused GW
 ------------------------------------
 
 
@@ -37,7 +37,6 @@
 from . import lowrank
 from . import gmm
 
-
 # OT functions
 from .lp import (
     emd,
@@ -73,6 +72,8 @@
 from .solvers import solve, solve_gromov, solve_sample
 from .lowrank import lowrank_sinkhorn
 
+from .batch import solve_batch, solve_gromov_batch
+
 # utils functions
 from .utils import dist, unif, tic, toc, toq
 
@@ -136,4 +137,6 @@
     "sliced_wasserstein_sphere_unif",
     "lowrank_sinkhorn",
     "lowrank_gromov_wasserstein_samples",
+    "solve_batch",
+    "solve_gromov_batch",
 ]
-Original file line number
+Diff line change
    backend
 +   batch
    bregman
    coot
    da