From fda7ec51fbb8d8106e3577fa68b0429122aadb9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AD=E3=81=8A=E3=82=93?= <240400715+neon-aiart@users.noreply.github.com> Date: Mon, 24 Nov 2025 10:09:04 +0900 Subject: [PATCH 1/5] feat: Update infer-web.py to include Base64 output component --- infer-web.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/infer-web.py b/infer-web.py index 47596d539..1522d95c1 100644 --- a/infer-web.py +++ b/infer-web.py @@ -835,6 +835,7 @@ def change_f0_method(f0method8): fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean" ) with gr.TabItem(i18n("单次推理")): + vc_output3_base64 = gr.JSON(label="US_Base64_Data", visible=False) with gr.Group(): with gr.Row(): with gr.Column(): @@ -962,7 +963,7 @@ def change_f0_method(f0method8): rms_mix_rate0, protect0, ], - [vc_output1, vc_output2], + [vc_output1, vc_output2, vc_output3_base64], api_name="infer_convert", ) with gr.TabItem(i18n("批量推理")): From 47ae3ad033767ece16773e127e283dc5821def5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AD=E3=81=8A=E3=82=93?= <240400715+neon-aiart@users.noreply.github.com> Date: Mon, 24 Nov 2025 10:41:49 +0900 Subject: [PATCH 2/5] fix: Implement Base64 audio encoding/decoding logic in vc_single Implement Base64 audio encoding/decoding logic in vc_single This commit focuses on the core logic within modules.py to fully enable Base64 audio data flow for API endpoints (e.g., infer_convert). 1. Fix Base64 Input Handling (f0_file object): - Corrects the bug where Base64 input failed because the function expected a file path. - If f0_file is a file-like object, the code now extracts the path via the '.name' attribute to ensure compatibility with subsequent file-based processing. 2. Implement Base64 Output: - The converted audio (audio_opt) is now safely written to a temporary WAV file using soundfile.write. - The WAV content is read and encoded into a Base64 Data URI (data:audio/wav;base64,...). - The function now returns this Base64 data as the 3rd element in the return tuple, allowing external tools to bypass the Gradio Audio component and receive raw data. 3. Fix vc_multi Compatibility: - The return value of vc_single was increased from 2 to 3 elements. - Updated the call site in vc_multi to accept the 3rd element as 'base64_opt' to prevent runtime errors and ensure code clarity in batch conversion mode. --- infer/modules/vc/modules.py | 45 ++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 6f695cc39..72767e65a 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -8,6 +8,9 @@ import torch from io import BytesIO +import base64 +import os + from infer.lib.audio import load_audio, wav2 from infer.lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, @@ -158,6 +161,15 @@ def vc_single( rms_mix_rate, protect, ): + + if (input_audio_path is None or input_audio_path == ""): + if isinstance(f0_file, str) and f0_file != "": + input_audio_path = f0_file + f0_file = None + elif hasattr(f0_file, 'name') and isinstance(getattr(f0_file, 'name'), str) and getattr(f0_file, 'name') != "": + input_audio_path = getattr(f0_file, 'name') + f0_file = None + if input_audio_path is None: return "You need to upload an audio", None f0_up_key = int(f0_up_key) @@ -214,15 +226,42 @@ def vc_single( if os.path.exists(file_index) else "Index not used." ) + + audio_return = (tgt_sr, audio_opt) + + cleaned_input_path = "" + if input_audio_path and isinstance(input_audio_path, str): + cleaned_input_path = input_audio_path.strip(" ").strip('"').strip("\n") + + temp_dir = os.path.dirname(cleaned_input_path) if cleaned_input_path and os.path.dirname(cleaned_input_path) else "./" + + temp_wav_path = os.path.join(temp_dir, f"temp_rvc_base64_{os.getpid()}.wav") + + sf.write(temp_wav_path, audio_opt, tgt_sr, format='WAV') + + with open(temp_wav_path, "rb") as f: + raw_base64 = base64.b64encode(f.read()).decode('utf-8') + + os.remove(temp_wav_path) + + base64_data_uri = f"data:audio/wav;base64,{raw_base64}" + return ( "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs." % (index_info, *times), - (tgt_sr, audio_opt), + + audio_return, + + { + "name": "rvc_conversion.wav", + "data": base64_data_uri, + "is_us_base64": True + } ) except: info = traceback.format_exc() logger.warning(info) - return info, (None, None) + return info, (None, None), None def vc_multi( self, @@ -259,7 +298,7 @@ def vc_multi( paths = [path.name for path in paths] infos = [] for path in paths: - info, opt = self.vc_single( + info, opt, base64_opt = self.vc_single( sid, path, f0_up_key, From 486a4adf5be2c41c404432fbd6c1a4637992e0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AD=E3=81=8A=E3=82=93?= <240400715+neon-aiart@users.noreply.github.com> Date: Mon, 24 Nov 2025 10:47:18 +0900 Subject: [PATCH 3/5] fix: Implement wildcard file search to resolve audio path mismatch Implement wildcard file search to resolve audio path mismatch This commit addresses a file path resolution issue in audio.py, specifically when dealing with audio files generated with random suffixes in their filenames (e.g., temporary files). 1. Add glob import: Imports the standard 'glob' module for wildcard searching. 2. Wildcard Path Resolution: If os.path.exists(file) is False, a wildcard search is performed by inserting '*' before the file extension (e.g., 'file.wav' becomes 'file*.wav'). This correctly finds the temporary audio file even if it contains a random suffix, preventing the original 'file not found' error. --- infer/lib/audio.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 60ef07cda..5e6af1971 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -5,6 +5,7 @@ from io import BytesIO import traceback import re +import glob def wav2(i, o, format): @@ -36,6 +37,17 @@ def load_audio(file, sr): # This launches a subprocess to decode audio while down-mixing and resampling as necessary. # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车 + + if os.path.exists(file) == False: + base_name = os.path.basename(file) + + if "." in base_name and file != "": + search_path = file.rsplit(".", 1)[0] + "*" + "." + file.rsplit(".", 1)[1] + found_files = glob.glob(search_path) + + if len(found_files) > 0: + file = found_files[0] + if os.path.exists(file) == False: raise RuntimeError( "You input a wrong audio path that does not exists, please fix it!" @@ -58,3 +70,4 @@ def clean_path(path_str): path_str = path_str.replace("/", "\\") path_str = re.sub(r'[\u202a\u202b\u202c\u202d\u202e]', '', path_str) # 移除 Unicode 控制字符 return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + From f10a1437a667f8882ca0fbd3db2e7e235f6cc54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AD=E3=81=8A=E3=82=93?= <240400715+neon-aiart@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:21:46 +0900 Subject: [PATCH 4/5] fix: Implement wildcard file search to resolve audio path mismatch Implement wildcard file search to resolve audio path mismatch This commit addresses a file path resolution issue in audio.py, specifically when dealing with audio files generated with random suffixes in their filenames (e.g., temporary files). 1. Add glob import: Imports the standard 'glob' module for wildcard searching. 2. Wildcard Path Resolution: If os.path.exists(file) is False, a wildcard search is performed by inserting '*' before the file extension (e.g., 'file.wav' becomes 'file*.wav'). This correctly finds the temporary audio file even if it contains a random suffix, preventing the original 'file not found' error. --- infer/lib/audio.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 5e6af1971..efe47abed 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -40,11 +40,13 @@ def load_audio(file, sr): if os.path.exists(file) == False: base_name = os.path.basename(file) - - if "." in base_name and file != "": - search_path = file.rsplit(".", 1)[0] + "*" + "." + file.rsplit(".", 1)[1] + + if "." in base_name and file != "": + search_path = ( + file.rsplit(".", 1)[0] + "*" + "." + file.rsplit(".", 1)[1] + ) found_files = glob.glob(search_path) - + if len(found_files) > 0: file = found_files[0] @@ -64,10 +66,10 @@ def load_audio(file, sr): return np.frombuffer(out, np.float32).flatten() - def clean_path(path_str): if platform.system() == "Windows": path_str = path_str.replace("/", "\\") - path_str = re.sub(r'[\u202a\u202b\u202c\u202d\u202e]', '', path_str) # 移除 Unicode 控制字符 + path_str = re.sub( + r"[\u202a\u202b\u202c\u202d\u202e]", "", path_str + ) # 移除 Unicode 控制字符 return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - From d35f4fcd1c03cd5191c0938aa7212057d60a7423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=AD=E3=81=8A=E3=82=93?= <240400715+neon-aiart@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:27:21 +0900 Subject: [PATCH 5/5] fix: Implement Base64 audio encoding/decoding logic in vc_single Implement Base64 audio encoding/decoding logic in vc_single This commit focuses on the core logic within modules.py to fully enable Base64 audio data flow for API endpoints (e.g., infer_convert). 1. Fix Base64 Input Handling (f0_file object): - Corrects the bug where Base64 input failed because the function expected a file path. - If f0_file is a file-like object, the code now extracts the path via the '.name' attribute to ensure compatibility with subsequent file-based processing. 2. Implement Base64 Output: - The converted audio (audio_opt) is now safely written to a temporary WAV file using soundfile.write. - The WAV content is read and encoded into a Base64 Data URI (data:audio/wav;base64,...). - The function now returns this Base64 data as the 3rd element in the return tuple, allowing external tools to bypass the Gradio Audio component and receive raw data. 3. Fix vc_multi Compatibility: - The return value of vc_single was increased from 2 to 3 elements. - Updated the call site in vc_multi to accept the 3rd element as 'base64_opt' to prevent runtime errors and ensure code clarity in batch conversion mode. --- infer/modules/vc/modules.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 72767e65a..5d3fb0c31 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -162,12 +162,16 @@ def vc_single( protect, ): - if (input_audio_path is None or input_audio_path == ""): + if input_audio_path is None or input_audio_path == "": if isinstance(f0_file, str) and f0_file != "": input_audio_path = f0_file f0_file = None - elif hasattr(f0_file, 'name') and isinstance(getattr(f0_file, 'name'), str) and getattr(f0_file, 'name') != "": - input_audio_path = getattr(f0_file, 'name') + elif ( + hasattr(f0_file, "name") + and isinstance(getattr(f0_file, "name"), str) + and getattr(f0_file, "name") != "" + ): + input_audio_path = getattr(f0_file, "name") f0_file = None if input_audio_path is None: @@ -227,20 +231,24 @@ def vc_single( else "Index not used." ) - audio_return = (tgt_sr, audio_opt) + audio_return = (tgt_sr, audio_opt) cleaned_input_path = "" if input_audio_path and isinstance(input_audio_path, str): cleaned_input_path = input_audio_path.strip(" ").strip('"').strip("\n") - temp_dir = os.path.dirname(cleaned_input_path) if cleaned_input_path and os.path.dirname(cleaned_input_path) else "./" + temp_dir = ( + os.path.dirname(cleaned_input_path) + if cleaned_input_path and os.path.dirname(cleaned_input_path) + else "./" + ) - temp_wav_path = os.path.join(temp_dir, f"temp_rvc_base64_{os.getpid()}.wav") + temp_wav_path = os.path.join(temp_dir, f"temp_rvc_base64_{os.getpid()}.wav") - sf.write(temp_wav_path, audio_opt, tgt_sr, format='WAV') + sf.write(temp_wav_path, audio_opt, tgt_sr, format="WAV") with open(temp_wav_path, "rb") as f: - raw_base64 = base64.b64encode(f.read()).decode('utf-8') + raw_base64 = base64.b64encode(f.read()).decode("utf-8") os.remove(temp_wav_path) @@ -249,14 +257,12 @@ def vc_single( return ( "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs." % (index_info, *times), - audio_return, - { "name": "rvc_conversion.wav", "data": base64_data_uri, - "is_us_base64": True - } + "is_us_base64": True, + }, ) except: info = traceback.format_exc()