threads-dlp/uploader.py at master · LayorX/threads-dlp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
import os
import json
import subprocess
import time
from datetime import datetime, timedelta, timezone
import google.generativeai as genai
import logging
import sys
import argparse
from dotenv import load_dotenv

from modules.database import init_db, get_all_videos_to_upload, update_upload_status, get_all_uploaded_videos, delete_video_record

class YouTubeQuotaExceededError(Exception):
    """自訂異常，用於表示 YouTube API 上傳配額已用盡。"""
    pass

def load_language_strings(language='zh-TW') -> dict:
    """從 languages.json 載入指定語言的字串。"""
    try:
        # 調整路徑以適應模組導入
        base_dir = os.path.dirname(os.path.abspath(__file__))
        lang_file_path = os.path.join(base_dir, 'languages.json')
        with open(lang_file_path, 'r', encoding='utf-8') as f:
            all_strings = json.load(f)
            # 返回 uploader 模組專用的字串，如果不存在則返回空字典
            return all_strings.get(language, {}).get('uploader', {})
    except (FileNotFoundError, json.JSONDecodeError):
        logging.error("語言檔案 languages.json 遺失或格式錯誤。")
        return {}

def setup_logging():
    """設定一個冪等的日誌記錄器，避免在匯入時重複設定。"""
    log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    root_logger = logging.getLogger()
    if not root_logger.handlers:
        root_logger.setLevel(logging.INFO)
        file_handler = logging.FileHandler("upload_log.txt", encoding='utf-8')
        file_handler.setFormatter(log_formatter)
        root_logger.addHandler(file_handler)
        console_handler = logging.StreamHandler(sys.stdout)
        try:
            # 嘗試設定主控台編碼為 UTF-8，以處理 Windows 環境下的特殊字元
            console_handler.stream.reconfigure(encoding='utf-8')
        except TypeError:
            # 在非 Windows 或不支援的環境中，這個操作可能會失敗
            pass
        console_handler.setFormatter(log_formatter)
        root_logger.addHandler(console_handler)

def get_folder_size(path='.'):
    """計算指定路徑下所有檔案的大小，並返回 GB 為單位的值。"""
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # 確保不是符號連結，以避免重複計算
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size / (1024**3) # 從 Bytes 轉換為 Gigabytes

def cleanup_uploaded_files(downloads_path="downloads", language='zh-TW'):
    """從資料庫獲取已上傳的影片列表，並刪除其本地檔案及對應的 .json 檔案。"""
    lang_strings = load_language_strings(language)
    logging.info(lang_strings.get('cleanup_start', "開始清理已上傳的檔案..."))
    uploaded_video_paths = get_all_uploaded_videos()
    deleted_count = 0
    if not uploaded_video_paths:
        logging.info(lang_strings.get('cleanup_no_files', "資料庫中沒有已上傳的影片可供清理。"))
        return

    for video_path in uploaded_video_paths:
        if video_path and os.path.exists(video_path):
            try:
                # 刪除影片檔案
                os.remove(video_path)
                logging.info(lang_strings.get('cleanup_deleted_video', "已刪除已上傳的影片: {path}").format(path=video_path))
                deleted_count += 1

                # 嘗試刪除對應的 metadata.json 檔案
                meta_path = os.path.splitext(video_path)[0] + '.json'
                if os.path.exists(meta_path):
                    os.remove(meta_path)
                    logging.info(lang_strings.get('cleanup_deleted_meta', "已刪除對應的元數據檔案: {path}").format(path=meta_path))

            except OSError as e:
                logging.error(lang_strings.get('cleanup_delete_error', "刪除檔案 {path} 時發生錯誤: {error}").format(path=video_path, error=e))
    logging.info(lang_strings.get('cleanup_done', "清理完畢。共刪除了 {count} 個影片檔案。").format(count=deleted_count))


def load_config() -> dict:
    """從 .env 和 config.json 載入設定，並整合環境變數作為最高優先級。"""
    # 為了本地開發方便，從 .env 檔案載入環境變數
    load_dotenv()

    # 先從 config.json 讀取基礎設定 (如果存在)
    try:
        with open("config.json", 'r', encoding='utf-8') as f:
            config = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        config = {}

    # 從環境變數讀取設定，這會覆寫 config.json 中的同名設定
    # API 金鑰
    config['api_key'] = os.getenv('GEMINI_API_KEY', config.get('api_key'))

    # 上傳器路徑 (根據作業系統決定預設值)
    default_uploader_path = './youtubeuploader.exe' if os.name == 'nt' else './youtubeuploader'
    config['youtube_uploader_path'] = os.getenv('YOUTUBE_UPLOADER_PATH', config.get('youtube_uploader_path', default_uploader_path))

    # 排程邏輯
    config['is_publish_now'] = os.getenv('PUBLISH_NOW', str(config.get('is_publish_now', False))).lower() in ['true', '1', 't']
    config['publish_start_from'] = int(os.getenv('PUBLISH_START_FROM_HOURS', config.get('publish_start_from', 0)))
    config['time_increment_hours'] = int(os.getenv('PUBLISH_INTERVAL_HOURS', config.get('time_increment_hours', 2)))

    # 檢查關鍵設定是否存在
    if not config.get('api_key') or "GEMINI API" in config.get('api_key', ''):
        logging.warning("警告: Gemini API 金鑰未設定。請設定 GEMINI_API_KEY 環境變數或在 config.json 中填寫。")

    return config

def generate_metadata(full_caption: str, video_filename: str, publish_time_iso: str, config: dict):
    """使用 Gemini API 為影片生成標題、描述和標籤。"""
    # 如果影片本身沒有任何文字描述，則嘗試使用影片檔名作為備用描述
    if not full_caption or not full_caption.strip():
        full_caption = os.path.splitext(video_filename)[0].replace('_', ' ').replace('-', ' ').strip()
        logging.info(f"原始描述為空，改用檔名 '{full_caption}' 作為生成內容的基礎。")

    # 雙重保險：如果處理後描述依然為空，則使用一個通用的預設值
    if not full_caption or not full_caption.strip():
        full_caption = "一部有趣的影片"  # 最終的硬編碼備用方案
        logging.warning(f"生成內容的基礎描述依然為空，強制使用通用描述: '{full_caption}'")

    api_key = config.get("api_key")
    if not api_key:
        raise ValueError("Gemini API 金鑰未設定。")
    genai.configure(api_key=api_key)

    # 使用較新的模型名稱
    model = genai.GenerativeModel('gemini-flash-lite-latest')

    # 設定安全設定，以避免因內容審查而被 API 阻擋
    safety_settings = [
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
    ]

    prompt = f"""
    你是一位專業的 YouTube 內容策略師。
    請根據以下影片資訊，生成一份用於 YouTube 上傳的中繼資料 JSON。

    **嚴格要求:**
    0.  **影片內容核心摘要** : `{full_caption}`
    1.  **純JSON輸出**: 你的回應 **只能** 包含 JSON 內容，禁止包含任何額外的文字或 markdown 標記 (例如 ```json ... ```)。
    2.  **發布時間**: `publishAt` 欄位的值 **必須** 是 `{publish_time_iso}`。
    3.  **多語言在地化**:
        - 主要語言 (`language`) 設定為 `en` (英文)。
        - `title` 和 `description` 必須是英文。
        - 在 `localizations` 物件中，提供 `zh-TW` (繁體中文), `ja` (日文), `ko` (韓文), `fr` (法文), `zh-CN` (簡體中文) 的標題和描述。
    4.  **內容優化**:
        - 標題應吸引人，與影片主題相關，適合 YouTube Shorts，並可適度包含一個 Emoji。
        - 描述內容應詳細，並在結尾附上 3-5 個相關的 hashtags。
    5.  **JSON 結構**: 嚴格遵守下方提供的 meta.json 結構。

    ```json
    {{
      "title": "Engaging English Title",
      "description": "Detailed English description of the video, ending with #hashtags.",
      "tags": ["tag1", "tag2", "tag3"],
      "privacyStatus": "private",
      "madeForKids": false,
      "embeddable": true,
      "license": "youtube",
      "publicStatsViewable": true,
      "publishAt": "{publish_time_iso}",
      "language": "en",
      "localizations": {{
        "zh-TW": {{"title": "吸引人的繁體中文標題", "description": "詳細的繁體中文影片描述，以 #hashtags 結尾。"}},
        "ja": {{"title": "魅力的な日本語のタイトル", "description": "詳細な日本語の動画説明、最後に #ハッシュタグ。"}},
        "ko": {{"title": "매력적인 한국어 제목", "description": "자세한 한국어 동영상 설명, #해시태그 로 끝납니다."}},
        "fr": {{"title": "Titre français engageant", "description": "Description détaillée de la vidéo en français, se terminant par des #hashtags."}},
        "zh-CN": {{"title": "吸引人的简体中文标题", "description": "详细的简体中文视频描述，以 #hashtags 结尾。"}}
      }}
    }}
    ```
    """

    try:
        logging.info("正在請求 Gemini API 生成影片元數據...")
        response = model.generate_content(prompt, safety_settings=safety_settings)

        # 移除潛在的 Markdown 格式
        cleaned_text = response.text.strip().replace('```json', '').replace('```', '').strip()

        if not cleaned_text:
            logging.error("Gemini API 返回了空的內容。")
            return None

        return json.loads(cleaned_text)
    except json.JSONDecodeError as e:
        logging.error(f"JSON 解碼失敗: {e}. 從 API 收到的原始文本是: '{response.text}'")
        return None
    except Exception as e:
        logging.error(f"生成元數據時發生未預期的錯誤: {e}")
        return None

def upload_video(video_path: str, meta_path: str, config: dict):
    """呼叫外部的 youtubeuploader 執行檔來上傳影片。"""
    uploader_path = config.get("youtube_uploader_path")
    if not uploader_path or not os.path.exists(uploader_path):
        logging.error(f"上傳器執行檔在 '{uploader_path}' 未找到。請設定 YOUTUBE_UPLOADER_PATH 環境變數或在 config.json 中配置。")
        return False

    # 確保授權檔案存在
    try:
        _ensure_file_from_env("client_secrets.json", "YT_CLIENT_SECRETS")
        _ensure_file_from_env("request.token", "YT_REQUEST")
    except FileNotFoundError as e:
        logging.error(f"無法繼續上傳，因為缺少授權檔案: {e}")
        return False

    command = [uploader_path, "-filename", video_path, "-metaJSON", meta_path]
    try:
        logging.info(f"正在上傳 '{video_path}'...")
        process = subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8')
        logging.info(f"成功上傳: {video_path}")
        logging.debug(f"Uploader output: {process.stdout}")
        return True
    except subprocess.CalledProcessError as e:
        error_output = e.stderr + e.stdout # 檢查標準錯誤和標準輸出
        quota_exceeded_message = "Error 400: The user has exceeded the number of videos they may upload."

        if quota_exceeded_message in error_output:
            logging.critical(f"YouTube API 配額已用盡，無法上傳更多影片。錯誤訊息: {error_output}")
            raise YouTubeQuotaExceededError(quota_exceeded_message)

        logging.error(f"上傳 '{video_path}' 失敗。Uploader 執行出錯。")
        logging.error(f"Return code: {e.returncode}")
        logging.error(f"STDOUT: {e.stdout}")
        logging.error(f"STDERR: {e.stderr}")
        return False
    except Exception as e:
        logging.error(f"上傳期間發生未預期的錯誤: {e}")
        return False

def _ensure_file_from_env(file_path: str, env_var: str):
    """
    輔助函式，確保一個檔案存在。如果檔案不存在，則嘗試從環境變數創建它。
    如果檔案和環境變數都缺失，則拋出 FileNotFoundError。
    """
    if not os.path.exists(file_path):
        logging.info(f"'{file_path}' 不存在。正在嘗試從環境變數 '{env_var}' 創建。")
        file_data = os.getenv(env_var)

        if file_data:
            try:
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(file_data)
                logging.info(f"成功從環境變數創建了 '{file_path}'。")
            except IOError as e:
                logging.error(f"寫入 '{file_path}' 失敗: {e}")
                raise
        else:
            message = f"致命錯誤: '{file_path}' 不存在且 '{env_var}' 環境變數也未設定。無法繼續。"
            logging.critical(message)
            raise FileNotFoundError(message)

def run_upload_task(cleanup_threshold_gb=0.8, num_videos=None, language='zh-TW'):
    """上傳影片的核心任務，包含清理邏輯。"""
    lang_strings = load_language_strings(language)

    # --- 磁碟空間清理檢查 ---
    downloads_folder = "downloads"
    folder_size_gb = get_folder_size(downloads_folder)
    logging.info(lang_strings.get('folder_size_check', "'{folder}' 資料夾目前大小: {size:.2f} GB。清理閾值為: {threshold} GB.").format(folder=downloads_folder, size=folder_size_gb, threshold=cleanup_threshold_gb))

    if folder_size_gb > cleanup_threshold_gb:
        logging.warning(lang_strings.get('cleanup_triggered', "資料夾大小已超過閾值。觸發已上傳檔案的清理程序..."))
        cleanup_uploaded_files(downloads_folder, language=language)
    else:
        logging.info(lang_strings.get('cleanup_not_needed', "資料夾大小在限制範圍內，無需清理。"))

    # --- 執行上傳 ---
    config = load_config()
    videos_to_upload = get_all_videos_to_upload()
    if not videos_to_upload:
        logging.info(lang_strings.get('no_videos_to_upload', "資料庫中沒有新的影片需要上傳。"))
        return

    # --- 根據 num_videos 參數限制上傳數量 ---
    if num_videos is not None and num_videos > 0:
        logging.info(lang_strings.get('upload_limit_info', "根據 --num_videos 參數，本次最多上傳 {count} 部影片。").format(count=num_videos))
        videos_to_upload = videos_to_upload[:num_videos]

    logging.info(lang_strings.get('videos_found', "發現 {count} 部影片待上傳。").format(count=len(videos_to_upload)))

    is_publish_now = config.get("is_publish_now")
    publish_start_from = config.get("publish_start_from")
    time_increment_hours = config.get("time_increment_hours")

    first_publish_time = datetime.now(timezone.utc) + (timedelta(minutes=5) if is_publish_now else timedelta(hours=publish_start_from))

    for i, video_data in enumerate(videos_to_upload):
        video_id = video_data['video_id']
        video_path = video_data['local_path']

        # 檢查檔案是否存在，如果不存在則刪除紀錄並跳過
        if not os.path.exists(video_path):
            logging.warning(lang_strings.get('video_file_not_found_delete', "影片檔案不存在於: {path}。將從資料庫中刪除此紀錄。").format(path=video_path))
            delete_video_record(video_id)
            continue

        logging.info(lang_strings.get('processing_video', "--- 正在處理第 {current}/{total} 部影片: {filename} ---").format(current=i+1, total=len(videos_to_upload), filename=os.path.basename(video_path)))

        publish_time = first_publish_time + timedelta(hours=i * time_increment_hours)
        publish_time_iso = publish_time.isoformat()

        video_filename = os.path.basename(video_path)
        meta_filename = f"{os.path.splitext(video_filename)[0]}.json"
        meta_path = os.path.join(os.path.dirname(video_path), meta_filename)

        metadata = generate_metadata(video_data['caption'], video_filename, publish_time_iso, config)
        if not metadata:
            logging.warning(lang_strings.get('metadata_gen_failed', "因元數據生成失敗，跳過影片 '{video_id}'。").format(video_id=video_id))
            continue

        with open(meta_path, 'w', encoding='utf-8') as f:
            json.dump(metadata, f, ensure_ascii=False, indent=4)

        try:
            if upload_video(video_path, meta_path, config):
                try:
                    # 從元數據中獲取標題以存入資料庫
                    youtube_title = metadata.get('title', '')
                    update_upload_status(video_id, status=True, title=youtube_title)
                except Exception as e:
                    logging.critical(lang_strings.get('db_update_failed', "致命錯誤: 更新影片 {video_id} 的資料庫狀態失敗: {error}").format(video_id=video_id, error=e))
                    break # 資料庫更新失敗是嚴重問題，應中止
            else:
                # 上傳失敗，記錄錯誤並繼續處理下一部影片
                logging.error(lang_strings.get('upload_failed_continue', "影片 '{video_id}' 上傳失敗。將繼續處理下一部影片。").format(video_id=video_id))
                continue
        except YouTubeQuotaExceededError:
            # 捕捉到配額超限錯誤，中止整個上傳任務
            logging.critical(lang_strings.get('youtube_quota_exceeded', "YouTube API 上傳配額已用盡。中止本次所有上傳任務。"))
            break
        time.sleep(5)
    logging.info(lang_strings.get('upload_task_complete', "所有上傳任務已完成。"))

def main():
    """CLI 進入點，解析參數並執行上傳任務。"""
    setup_logging()

    parser = argparse.ArgumentParser(description="將 'downloads' 資料夾中的影片上傳到 YouTube。")
    parser.add_argument(
        '-du', '--deleteupload',
        type=float,
        default=0.8,
        help='設定清理閾值 (GB)。當 downloads 資料夾大小超過此值時，將刪除已上傳的影片。預設值為 0.8 GB。'
    )
    parser.add_argument(
        '-n', '--num_videos',
        type=int,
        default=None,
        help='指定本次上傳影片的數量上限。預設為無限制。'
    )
    parser.add_argument(
        '-l', '--language',
        type=str,
        default='zh-TW',
        choices=['zh-TW', 'en'],
        help='設定日誌輸出的語言。'
    )
    args = parser.parse_args()

    init_db()
    run_upload_task(
        cleanup_threshold_gb=args.deleteupload,
        num_videos=args.num_videos,
        language=args.language
    )

if __name__ == "__main__":
    main()