From ec717567c244959e7360e3422bb2b6355577d7ce Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 01:12:51 -0300
Subject: [PATCH 1/6] feat: Implement batch processing for the MDXC separator

---
 .../separator/architectures/mdxc_separator.py | 60 +++++++++++++------
 audio_separator/separator/separator.py        | 10 ++--
 audio_separator/utils/cli.py                  |  4 +-
 3 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/audio_separator/separator/architectures/mdxc_separator.py b/audio_separator/separator/architectures/mdxc_separator.py
index 1ddb499..1b4631c 100644
--- a/audio_separator/separator/architectures/mdxc_separator.py
+++ b/audio_separator/separator/architectures/mdxc_separator.py
@@ -3,6 +3,7 @@
 
 import torch
 import numpy as np
+from torch.utils.data import DataLoader, Dataset
 from tqdm import tqdm
 from ml_collections import ConfigDict
 from scipy import signal
@@ -13,6 +14,31 @@
 # Roformer direct constructors removed; loading handled via RoformerLoader in CommonSeparator.
 
 
+class RoformerDataset(Dataset):
+    def __init__(self, mix, chunk_size, step):
+        self.mix = mix
+        self.chunk_size = chunk_size
+        self.step = step
+        self.indices = list(range(0, mix.shape[1], step))
+
+    def __len__(self):
+        return len(self.indices)
+
+    def __getitem__(self, idx):
+        start_idx = self.indices[idx]
+        part = self.mix[:, start_idx : start_idx + self.chunk_size]
+        length = part.shape[-1]
+
+        # We need to handle the last chunk where part is smaller than chunk_size
+        if start_idx + self.chunk_size > self.mix.shape[1]:
+            # Take the last chunk_size from the end
+            part = self.mix[:, -self.chunk_size :]
+            length = self.chunk_size
+            start_idx = self.mix.shape[1] - self.chunk_size
+
+        return part, start_idx, length
+
+
 class MDXCSeparator(CommonSeparator):
     """
     MDXCSeparator is responsible for separating audio sources using MDXC models.
@@ -41,6 +67,7 @@ def __init__(self, common_config, arch_config):
 
         self.overlap = arch_config.get("overlap", 8)
         self.batch_size = arch_config.get("batch_size", 1)
+        self.num_workers = arch_config.get("num_workers", 0)
 
         # Amount of pitch shift to apply during processing (this does NOT affect the pitch of the output audio):
         # • Whole numbers indicate semitones.
@@ -51,7 +78,7 @@ def __init__(self, common_config, arch_config):
 
         self.process_all_stems = arch_config.get("process_all_stems", True)
 
-        self.logger.debug(f"MDXC arch params: batch_size={self.batch_size}, segment_size={self.segment_size}, overlap={self.overlap}")
+        self.logger.debug(f"MDXC arch params: batch_size={self.batch_size}, segment_size={self.segment_size}, overlap={self.overlap}, num_workers={self.num_workers}")
         self.logger.debug(f"MDXC arch params: override_model_segment_size={self.override_model_segment_size}, pitch_shift={self.pitch_shift}")
         self.logger.debug(f"MDXC multi-stem params: process_all_stems={self.process_all_stems}")
 
@@ -317,28 +344,23 @@ def demix(self, mix: np.ndarray) -> dict:
                 result = torch.zeros(req_shape, dtype=torch.float32)
                 counter = torch.zeros(req_shape, dtype=torch.float32)
 
-                for i in tqdm(range(0, mix.shape[1], step)):
-                    part = mix[:, i : i + chunk_size]
-                    length = part.shape[-1]
-                    if i + chunk_size > mix.shape[1]:
-                        part = mix[:, -chunk_size:]
-                        length = chunk_size
-                    part = part.to(device)
-                    x = self.model_run(part.unsqueeze(0))[0]
-                    x = x.cpu()
-                    # Perform overlap_add on CPU
-                    if i + chunk_size > mix.shape[1]:
-                        # Fixed to correctly add to the end of the tensor
-                        start_idx = result.shape[-1] - chunk_size
+                dataset = RoformerDataset(mix, chunk_size, step)
+                dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, pin_memory=(device.type == "cuda"))
+
+                for parts, start_idxs, lengths in tqdm(dataloader):
+                    parts = parts.to(device)
+                    xs = self.model_run(parts)
+
+                    for b in range(len(xs)):
+                        x = xs[b].cpu()
+                        start_idx = start_idxs[b].item()
+                        length = lengths[b].item()
+
+                        # Perform overlap_add on CPU
                         result = self.overlap_add(result, x, window, start_idx, length)
                         safe_len = min(length, x.shape[-1], window.shape[0])
                         if safe_len > 0:
                             counter[..., start_idx : start_idx + safe_len] += window[:safe_len]
-                    else:
-                        result = self.overlap_add(result, x, window, i, length)
-                        safe_len = min(length, x.shape[-1], window.shape[0])
-                        if safe_len > 0:
-                            counter[..., i : i + safe_len] += window[:safe_len]
 
             inferenced_outputs = result / counter.clamp(min=1e-10)
 
diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
index 9a8e42f..61f7d37 100644
--- a/audio_separator/separator/separator.py
+++ b/audio_separator/separator/separator.py
@@ -99,7 +99,7 @@ def __init__(
         mdx_params={"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False},
         vr_params={"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False},
         demucs_params={"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True},
-        mdxc_params={"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0},
+        mdxc_params={"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0, "num_workers": 0},
         info_only=False,
     ):
         """Initialize the separator."""
@@ -169,7 +169,7 @@ def __init__(
 
         self.invert_using_spec = invert_using_spec
         if self.invert_using_spec:
-            self.logger.debug(f"Secondary step will be inverted using spectogram rather than waveform. This may improve quality but is slightly slower.")
+            self.logger.debug("Secondary step will be inverted using spectogram rather than waveform. This may improve quality but is slightly slower.")
 
         try:
             self.sample_rate = int(sample_rate)
@@ -496,14 +496,14 @@ def list_supported_model_files(self):
         self.download_file_if_not_exists("https://raw.githubusercontent.com/TRvlvr/application_data/main/filelists/download_checks.json", download_checks_path)
 
         model_downloads_list = json.load(open(download_checks_path, encoding="utf-8"))
-        self.logger.debug(f"UVR model download list loaded")
+        self.logger.debug("UVR model download list loaded")
 
         # Load the model scores with error handling
         model_scores = {}
         try:
             with resources.open_text("audio_separator", "models-scores.json") as f:
                 model_scores = json.load(f)
-            self.logger.debug(f"Model scores loaded")
+            self.logger.debug("Model scores loaded")
         except json.JSONDecodeError as e:
             self.logger.warning(f"Failed to load model scores: {str(e)}")
             self.logger.warning("Continuing without model scores")
@@ -529,7 +529,7 @@ def list_supported_model_files(self):
         # Load the JSON file using importlib.resources
         with resources.open_text("audio_separator", "models.json") as f:
             audio_separator_models_list = json.load(f)
-        self.logger.debug(f"Audio-Separator model list loaded")
+        self.logger.debug("Audio-Separator model list loaded")
 
         # Return object with list of model names
         model_files_grouped_by_type = {
diff --git a/audio_separator/utils/cli.py b/audio_separator/utils/cli.py
index e6a8492..19c856a 100755
--- a/audio_separator/utils/cli.py
+++ b/audio_separator/utils/cli.py
@@ -3,7 +3,6 @@
 import logging
 import json
 import sys
-import os
 from importlib import metadata
 
 
@@ -118,6 +117,7 @@ def main():
     mdxc_override_model_segment_size_help = "Override model default segment size instead of using the model default value. Example: --mdxc_override_model_segment_size"
     mdxc_overlap_help = "Amount of overlap between prediction windows, 2-50. Higher is better but slower (default: %(default)s). Example: --mdxc_overlap=8"
     mdxc_batch_size_help = "Larger consumes more RAM but may process slightly faster (default: %(default)s). Example: --mdxc_batch_size=4"
+    mdxc_num_workers_help = "Number of workers for DataLoader. Higher = faster preprocessing but more CPU/RAM (default: %(default)s). Example: --mdxc_num_workers=4"
     mdxc_pitch_shift_help = "Shift audio pitch by a number of semitones while processing. May improve output for deep/high vocals. (default: %(default)s). Example: --mdxc_pitch_shift=2"
 
     mdxc_params = parser.add_argument_group("MDXC Architecture Parameters")
@@ -125,6 +125,7 @@ def main():
     mdxc_params.add_argument("--mdxc_override_model_segment_size", action="store_true", help=mdxc_override_model_segment_size_help)
     mdxc_params.add_argument("--mdxc_overlap", type=int, default=8, help=mdxc_overlap_help)
     mdxc_params.add_argument("--mdxc_batch_size", type=int, default=1, help=mdxc_batch_size_help)
+    mdxc_params.add_argument("--mdxc_num_workers", type=int, default=0, help=mdxc_num_workers_help)
     mdxc_params.add_argument("--mdxc_pitch_shift", type=int, default=0, help=mdxc_pitch_shift_help)
 
     args = parser.parse_args()
@@ -228,6 +229,7 @@ def main():
         mdxc_params={
             "segment_size": args.mdxc_segment_size,
             "batch_size": args.mdxc_batch_size,
+            "num_workers": args.mdxc_num_workers,
             "overlap": args.mdxc_overlap,
             "override_model_segment_size": args.mdxc_override_model_segment_size,
             "pitch_shift": args.mdxc_pitch_shift,

From 72e293263165cc08f10193a5f786ce084be41d4a Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 02:35:59 -0300
Subject: [PATCH 2/6] fix: CLI tests missing new "num_workers" param

---
 tests/unit/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 4ed37d7..ef2cdea 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -28,7 +28,7 @@ def common_expected_args():
         "mdx_params": {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False},
         "vr_params": {"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False},
         "demucs_params": {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True},
-        "mdxc_params": {"segment_size": 256, "batch_size": 1, "overlap": 8, "override_model_segment_size": False, "pitch_shift": 0},
+        "mdxc_params": {"segment_size": 256, "batch_size": 1, "overlap": 8, "override_model_segment_size": False, "pitch_shift": 0, "num_workers": 0},
     }
 
 

From 0bb3d1b0e4e1443bf6433ce54dd54f4e9d9f14dc Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 02:37:59 -0300
Subject: [PATCH 3/6] chore: add docstring documentation to new RoformerDataset
 class and its methods

---
 .../separator/architectures/mdxc_separator.py | 33 +++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/audio_separator/separator/architectures/mdxc_separator.py b/audio_separator/separator/architectures/mdxc_separator.py
index 1b4631c..82e8c79 100644
--- a/audio_separator/separator/architectures/mdxc_separator.py
+++ b/audio_separator/separator/architectures/mdxc_separator.py
@@ -15,26 +15,55 @@
 
 
 class RoformerDataset(Dataset):
+    """
+    Dataset for handling Roformer audio chunks.
+    It splits the audio mix into configurable chunks with a specified step size.
+    """
+
     def __init__(self, mix, chunk_size, step):
+        """
+        Initializes the RoformerDataset.
+
+        Args:
+            mix (np.ndarray): The audio mix to be processed.
+            chunk_size (int): The size of each chunk.
+            step (int): The step size between chunks.
+        """
         self.mix = mix
         self.chunk_size = chunk_size
         self.step = step
         self.indices = list(range(0, mix.shape[1], step))
 
     def __len__(self):
+        """
+        Returns the number of chunks in the dataset.
+
+        Returns:
+            int: The number of chunks.
+        """
         return len(self.indices)
 
     def __getitem__(self, idx):
+        """
+        Gets a chunk from the dataset by index.
+
+        Args:
+            idx (int): The index of the chunk.
+
+        Returns:
+            tuple: A tuple containing the chunk (np.ndarray), the start index (int), and the length (int).
+        """
         start_idx = self.indices[idx]
         part = self.mix[:, start_idx : start_idx + self.chunk_size]
         length = part.shape[-1]
 
         # We need to handle the last chunk where part is smaller than chunk_size
-        if start_idx + self.chunk_size > self.mix.shape[1]:
+        if length < self.chunk_size and self.mix.shape[1] >= self.chunk_size:
             # Take the last chunk_size from the end
             part = self.mix[:, -self.chunk_size :]
             length = self.chunk_size
             start_idx = self.mix.shape[1] - self.chunk_size
+        # If mix is shorter than chunk_size, keep original part and length
 
         return part, start_idx, length
 
@@ -351,7 +380,7 @@ def demix(self, mix: np.ndarray) -> dict:
                     parts = parts.to(device)
                     xs = self.model_run(parts)
 
-                    for b in range(len(xs)):
+                    for b in range(xs.shape[0]):
                         x = xs[b].cpu()
                         start_idx = start_idxs[b].item()
                         length = lengths[b].item()

From 75a7c5efce8ba64e8b7d1d6c134e96b3956542a9 Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 13:08:37 -0300
Subject: [PATCH 4/6] fix: optimize Roformer scheduling and batch D2H transfers

- Refactor RoformerDataset to calculate tail-window remapping during initialization.
- Fix an issue where the final audio chunk was duplicated when the tail start fell exactly on a step boundary.
- Optimize MDXCSeparator inference by moving device-to-host transfers (.cpu()) outside the per-sample loop.
- Batch the D2H copies to reduce synchronization overhead and improve processing speed.
- Simplify RoformerDataset.__getitem__ to remove redundant re-calculation logic.
---
 .../separator/architectures/mdxc_separator.py | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/audio_separator/separator/architectures/mdxc_separator.py b/audio_separator/separator/architectures/mdxc_separator.py
index 82e8c79..819dd0a 100644
--- a/audio_separator/separator/architectures/mdxc_separator.py
+++ b/audio_separator/separator/architectures/mdxc_separator.py
@@ -32,7 +32,19 @@ def __init__(self, mix, chunk_size, step):
         self.mix = mix
         self.chunk_size = chunk_size
         self.step = step
-        self.indices = list(range(0, mix.shape[1], step))
+
+        indices = list(range(0, mix.shape[1], step))
+        last_start = mix.shape[1] - chunk_size
+
+        if last_start > 0:
+            # Remap any index that would result in a short chunk to the last_start
+            indices = [i if i <= last_start else last_start for i in indices]
+        elif last_start <= 0:
+            # If mix is shorter than or equal to chunk_size, only one chunk starting at 0 is needed
+            indices = [0]
+
+        # Use a dictionary to preserve insertion order while deduplicating
+        self.indices = list(dict.fromkeys(indices))
 
     def __len__(self):
         """
@@ -57,14 +69,6 @@ def __getitem__(self, idx):
         part = self.mix[:, start_idx : start_idx + self.chunk_size]
         length = part.shape[-1]
 
-        # We need to handle the last chunk where part is smaller than chunk_size
-        if length < self.chunk_size and self.mix.shape[1] >= self.chunk_size:
-            # Take the last chunk_size from the end
-            part = self.mix[:, -self.chunk_size :]
-            length = self.chunk_size
-            start_idx = self.mix.shape[1] - self.chunk_size
-        # If mix is shorter than chunk_size, keep original part and length
-
         return part, start_idx, length
 
 
@@ -378,10 +382,10 @@ def demix(self, mix: np.ndarray) -> dict:
 
                 for parts, start_idxs, lengths in tqdm(dataloader):
                     parts = parts.to(device)
-                    xs = self.model_run(parts)
+                    xs = self.model_run(parts).detach().cpu()
 
                     for b in range(xs.shape[0]):
-                        x = xs[b].cpu()
+                        x = xs[b]
                         start_idx = start_idxs[b].item()
                         length = lengths[b].item()
 

From d8b75385605335dd770f5a30375e2b562fe7c9c5 Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 13:11:23 -0300
Subject: [PATCH 5/6] tests: add RoformerDataset test coverage and verify CLI
 num_workers forwarding

---
 tests/unit/test_cli.py                    | 19 +++++++-
 tests/unit/test_mdxc_roformer_chunking.py | 59 +++++++++++++++++++++--
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index ef2cdea..e24ac6d 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -2,7 +2,6 @@
 import pytest
 import logging
 from audio_separator.utils.cli import main
-import subprocess
 from unittest import mock
 from unittest.mock import patch, MagicMock, mock_open
 
@@ -275,3 +274,21 @@ def test_cli_demucs_output_names_argument(common_expected_args):
             # Assertions
             mock_separator.assert_called_once_with(**common_expected_args)
             mock_separator_instance.separate.assert_called_once_with(["test_audio.mp3"], custom_output_names=demucs_output_names)
+
+
+# Test using mdxc_num_workers argument
+def test_cli_mdxc_num_workers_argument(common_expected_args):
+    test_args = ["cli.py", "test_audio.mp3", "--mdxc_num_workers=2"]
+    with patch("sys.argv", test_args):
+        with patch("audio_separator.separator.Separator") as mock_separator:
+            mock_separator_instance = mock_separator.return_value
+            mock_separator_instance.separate.return_value = ["output_file.mp3"]
+            main()
+
+            # Update expected args for this specific test
+            expected_args = common_expected_args.copy()
+            expected_args["mdxc_params"] = expected_args["mdxc_params"].copy()
+            expected_args["mdxc_params"]["num_workers"] = 2
+
+            # Assertions
+            mock_separator.assert_called_once_with(**expected_args)
diff --git a/tests/unit/test_mdxc_roformer_chunking.py b/tests/unit/test_mdxc_roformer_chunking.py
index 9d3ddd4..52d70d5 100644
--- a/tests/unit/test_mdxc_roformer_chunking.py
+++ b/tests/unit/test_mdxc_roformer_chunking.py
@@ -6,8 +6,9 @@
 import pytest
 import numpy as np
 import torch
-from unittest.mock import Mock, MagicMock, patch
+from unittest.mock import Mock
 import logging
+from audio_separator.separator.architectures.mdxc_separator import RoformerDataset
 
 
 class TestMDXCRoformerChunking:
@@ -103,7 +104,6 @@ def test_counter_updates_safe_len(self):
         """T055: Counter increments match overlap_add safe span."""
         # Mock counter and overlap_add logic
         counter = torch.zeros(2, 20000)
-        chunk_size = 8192
         safe_len = 6000  # Shorter than chunk_size
         start_idx = 1000
         
@@ -288,7 +288,7 @@ def mock_setup_chunking_with_logging(model, audio):
             audio = Mock()
             audio.hop_length = 512
             
-            result = mock_setup_chunking_with_logging(model_with_stft, audio)
+            mock_setup_chunking_with_logging(model_with_stft, audio)
             
             # Verify logging occurred
             assert "stft_hop_length=1024" in caplog.text
@@ -342,10 +342,61 @@ def mock_calculate_iterations(audio_len, chunk_sz, step_sz):
             )
             
             # Verify minimum iterations
-            assert actual_iterations >= 1, f"Should always have at least 1 iteration"
+            assert actual_iterations >= 1, "Should always have at least 1 iteration"
             
             # Verify maximum reasonable iterations
             max_reasonable = (audio_length // step_size) + 2
             assert actual_iterations <= max_reasonable, (
                 f"Too many iterations {actual_iterations} for audio_len={audio_length}"
             )
+
+
+class TestRoformerDataset:
+    """Test cases for the RoformerDataset class."""
+
+    def test_roformer_dataset_no_duplicates(self):
+        """Verify that indices are correctly deduplicated when tail lands on step boundary."""
+        mix = np.zeros((2, 100))
+        chunk_size = 20
+        step = 10
+        dataset = RoformerDataset(mix, chunk_size, step)
+
+        # Expected indices: 0, 10, 20, 30, 40, 50, 60, 70, 80
+        # (90 was remapped to 80 and then deduplicated)
+        expected_indices = [0, 10, 20, 30, 40, 50, 60, 70, 80]
+        assert dataset.indices == expected_indices
+        assert len(dataset.indices) == len(set(dataset.indices))
+
+    def test_roformer_dataset_tail_remapped(self):
+        """Verify that audio tail is correctly remapped and included."""
+        mix = np.zeros((2, 105))
+        chunk_size = 20
+        step = 10
+        dataset = RoformerDataset(mix, chunk_size, step)
+
+        # Expected indices: 0, 10, 20, 30, 40, 50, 60, 70, 80, 85
+        expected_indices = [0, 10, 20, 30, 40, 50, 60, 70, 80, 85]
+        assert dataset.indices == expected_indices
+        assert len(dataset.indices) == len(set(dataset.indices))
+
+    def test_roformer_dataset_short_audio(self):
+        """Verify that audio shorter than chunk_size is handled correctly."""
+        mix = np.zeros((2, 10))
+        chunk_size = 20
+        step = 10
+        dataset = RoformerDataset(mix, chunk_size, step)
+
+        # Should result in just [0]
+        assert dataset.indices == [0]
+        part, start_idx, length = dataset[0]
+        assert part.shape == (2, 10)
+        assert start_idx == 0
+        assert length == 10
+
+    def test_roformer_dataset_exact_overlap(self):
+        """Verify that exact overlaps result in correct index scheduling."""
+        mix = np.zeros((2, 40))
+        chunk_size = 20
+        step = 20
+        dataset = RoformerDataset(mix, chunk_size, step)
+        assert dataset.indices == [0, 20]

From 8926f70cdb56a40daabf2108f2577131f6a27db6 Mon Sep 17 00:00:00 2001
From: Pedro Almeida <pedroalmeida.415@gmail.com>
Date: Thu, 12 Mar 2026 13:51:42 -0300
Subject: [PATCH 6/6] chore: update README and inline documentation with new
 num_workers param

---
 README.md                              | 5 +++--
 audio_separator/separator/separator.py | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c6b8057..eaff0ce 100644
--- a/README.md
+++ b/README.md
@@ -328,7 +328,7 @@ usage: audio-separator [-h] [-v] [-d] [-e] [-l] [--log_level LOG_LEVEL] [--list_
                        [--vr_window_size VR_WINDOW_SIZE] [--vr_aggression VR_AGGRESSION] [--vr_enable_tta] [--vr_high_end_process] [--vr_enable_post_process]
                        [--vr_post_process_threshold VR_POST_PROCESS_THRESHOLD] [--demucs_segment_size DEMUCS_SEGMENT_SIZE] [--demucs_shifts DEMUCS_SHIFTS] [--demucs_overlap DEMUCS_OVERLAP]
                        [--demucs_segments_enabled DEMUCS_SEGMENTS_ENABLED] [--mdxc_segment_size MDXC_SEGMENT_SIZE] [--mdxc_override_model_segment_size] [--mdxc_overlap MDXC_OVERLAP]
-                       [--mdxc_batch_size MDXC_BATCH_SIZE] [--mdxc_pitch_shift MDXC_PITCH_SHIFT]
+                       [--mdxc_batch_size MDXC_BATCH_SIZE] [--mdxc_num_workers MDXC_NUM_WORKERS] [--mdxc_pitch_shift MDXC_PITCH_SHIFT]
                        [audio_files ...]
 
 Separate audio file into different stems.
@@ -394,6 +394,7 @@ MDXC Architecture Parameters:
   --mdxc_override_model_segment_size                     Override model default segment size instead of using the model default value. Example: --mdxc_override_model_segment_size
   --mdxc_overlap MDXC_OVERLAP                            Amount of overlap between prediction windows, 2-50. Higher is better but slower (default: 8). Example: --mdxc_overlap=8
   --mdxc_batch_size MDXC_BATCH_SIZE                      Larger consumes more RAM but may process slightly faster (default: 1). Example: --mdxc_batch_size=4
+  --mdxc_num_workers MDXC_NUM_WORKERS                    Number of workers for DataLoader. Higher = faster preprocessing but more CPU/RAM (default: 0). Example: --mdxc_num_workers=4
   --mdxc_pitch_shift MDXC_PITCH_SHIFT                    Shift audio pitch by a number of semitones while processing. May improve output for deep/high vocals. (default: 0). Example: --mdxc_pitch_shift=2
 ```
 
@@ -524,7 +525,7 @@ You can also rename specific stems:
 - **`mdx_params`:** (Optional) MDX Architecture Specific Attributes & Defaults. `Default: {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False}`
 - **`vr_params`:** (Optional) VR Architecture Specific Attributes & Defaults. `Default: {"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False}`
 - **`demucs_params`:** (Optional) Demucs Architecture Specific Attributes & Defaults. `Default: {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}`
-- **`mdxc_params`:** (Optional) MDXC Architecture Specific Attributes & Defaults. `Default: {"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0}`
+- **`mdxc_params`:** (Optional) MDXC Architecture Specific Attributes & Defaults. `Default: {"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0, "num_workers": 0}`
 
 ## Remote API Usage 🌐
 
diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
index 61f7d37..dccd439 100644
--- a/audio_separator/separator/separator.py
+++ b/audio_separator/separator/separator.py
@@ -77,6 +77,7 @@ class Separator:
         batch_size: 1
         overlap: 8
         pitch_shift: 0
+        num_workers: 0
     """
 
     def __init__(