From ee84d216a8fa7781d4c065157a96cefef46ad43b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 09:32:30 +0000 Subject: [PATCH 1/2] Initial plan From bbc0e3183987aee56a17d76df66b2a3b054b146d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 10:10:39 +0000 Subject: [PATCH 2/2] Fix download error with improved fallback strategy Co-authored-by: Shunian-Chen <40827070+Shunian-Chen@users.noreply.github.com> --- .../0_video_download/download_clips.py | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/data_pipeline/0_video_download/download_clips.py b/data_pipeline/0_video_download/download_clips.py index 516bf26..4fcc1af 100644 --- a/data_pipeline/0_video_download/download_clips.py +++ b/data_pipeline/0_video_download/download_clips.py @@ -229,8 +229,8 @@ def run_yt_dlp_multi_sections( "--clean-info-json", # 清理信息文件 # --- 输出模板 --- "-o", output_template, - # 尽量拿到 H.264+AAC,可无损 remux;退化到 best 也能跑 - "-f", "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", + # 更宽松的格式选择:优先 mp4,但允许其他格式 + "-f", "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio/best", "--merge-output-format", "mp4", ] if strict_cuts: @@ -247,24 +247,29 @@ def run_yt_dlp_multi_sections( proc = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') if proc.returncode == 0: return 0, proc.stdout.strip() - # 简单回退:遇到“格式不可用”就退到 best + # 多层回退策略:遇到"格式不可用"时使用更简单的格式选择 err_msg = (proc.stderr.strip() or proc.stdout.strip()) - if "Requested format is not available" in err_msg: + if "Requested format is not available" in err_msg or "format" in err_msg.lower(): + # 第一次回退:尝试更简单的格式选择(无编解码器限制) fallback_cmd = [ *base_cmd, "-4", "--ignore-config", "--no-playlist", "--retries", "10", "--fragment-retries", "10", "--concurrent-fragments", "8", "-N", "4", "--no-warnings", "--restrict-filenames", - "-c", "--no-overwrites", + "--no-continue", "--no-overwrites", # 保持与主命令一致 # --- 新增功能 (回退) --- "--print", "after_move:filepath", "--write-subs", "--write-auto-subs", "--write-description", - "--extract-audio", "--audio-format", "m4a", "--keep-video", + "--extract-audio", "--audio-format", "m4a", "--audio-quality", "0", + "--keep-video", + "--no-keep-fragments", + "--clean-info-json", # --- 输出模板 (回退) --- "-o", output_template, - "-f", "bestvideo[ext=mp4][vcodec!=none]+bestaudio[ext=m4a]/best[ext=mp4][vcodec!=none]", - "--remux-video", "mp4", + # 第一次回退:只要求最佳质量,不限制格式 + "-f", "best", + "--merge-output-format", "mp4", ] if strict_cuts: fallback_cmd.append("--force-keyframes-at-cuts") @@ -276,7 +281,27 @@ def run_yt_dlp_multi_sections( proc2 = subprocess.run(fallback_cmd, capture_output=True, text=True, encoding='utf-8') if proc2.returncode == 0: return 0, proc2.stdout.strip() - return proc2.returncode, (proc2.stderr.strip() or proc2.stdout.strip()) + + # 第二次回退:移除所有可选功能,仅下载视频 + minimal_cmd = [ + *base_cmd, + "-4", "--ignore-config", "--no-playlist", + "--retries", "10", "--fragment-retries", "10", + "--no-warnings", "--restrict-filenames", + "--no-continue", "--no-overwrites", + "--print", "after_move:filepath", + "-o", output_template, + "-f", "best", + ] + if extractor_args: + minimal_cmd.extend(["--extractor-args", extractor_args]) + minimal_cmd.extend(section_args) + minimal_cmd.append(url) + + proc3 = subprocess.run(minimal_cmd, capture_output=True, text=True, encoding='utf-8') + if proc3.returncode == 0: + return 0, proc3.stdout.strip() + return proc3.returncode, (proc3.stderr.strip() or proc3.stdout.strip()) return proc.returncode, err_msg except Exception as exc: # noqa: BLE001 return 1, f"yt-dlp failed: {exc}"