From 176689d33c3105e740842730aacea9bb1ebab176 Mon Sep 17 00:00:00 2001 From: Jurrer <72750942+Jurrer@users.noreply.github.com> Date: Tue, 12 May 2026 18:32:36 +0200 Subject: [PATCH] channels cleanup --- delete-channels-zero-videos.py | 131 +++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 delete-channels-zero-videos.py diff --git a/delete-channels-zero-videos.py b/delete-channels-zero-videos.py new file mode 100644 index 0000000..7e5d5fd --- /dev/null +++ b/delete-channels-zero-videos.py @@ -0,0 +1,131 @@ +""" +This script deletes channels that have 0 videos from TubeArchivist. +It will also remove any associated playlists +""" + +import os +import sys +import requests +import click +from elasticsearch import Elasticsearch +from dotenv import load_dotenv + + +load_dotenv() + +es = Elasticsearch( + [os.getenv("ES_HOST")], basic_auth=(os.getenv("ES_USER"), os.getenv("ES_PASSWORD")) +) + +session = requests.Session() + +TA_URL = os.getenv("TA_URL") +API_URL = f"{TA_URL}/api" + + +def get_channels_with_zero_videos(include_subscribed=False): + """Get all channel IDs that have 0 videos in the video index.""" + must_query = [] + if not include_subscribed: + must_query.append({"term": {"channel_subscribed": False}}) + + channel_query = { + "size": 10000, + "query": {"bool": {"must": must_query}} if must_query else {"match_all": {}}, + "_source": ["channel_id", "channel_name"], + } + + channel_results = es.search(index="ta_channel", body=channel_query) + + channels = { + hit["_source"]["channel_id"]: hit["_source"].get("channel_name", "Unknown") + for hit in channel_results["hits"]["hits"] + } + + channel_ids = list(channels.keys()) + print(f"Total channels: {len(channel_ids)}") + + channels_without_videos = [] + + for channel_id in channel_ids: + video_query = {"query": {"term": {"channel.channel_id": channel_id}}} + video_results = es.search(index="ta_video", body=video_query) + + if video_results["hits"]["total"]["value"] == 0: + channels_without_videos.append((channel_id, channels[channel_id])) + + print(f"Channels without videos: {len(channels_without_videos)}") + return channels_without_videos + + +def delete_channel(channel_id): + """Delete a channel from TubeArchivist using DELETE request.""" + url = f"{API_URL}/channel/{channel_id}/" + r = session.delete(url) + return r.status_code, r.text + + +@click.command() +@click.option("--dry-run", is_flag=True, help="Show channels that would be deleted without actually deleting.") +@click.option("--channel-id", type=str, help="Delete a specific channel by ID.") +@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt.") +@click.option("--include-subscribed", is_flag=True, help="Also delete subscribed channels (default: only unsubscribed).") +def cli(dry_run, channel_id, yes, include_subscribed): + """Delete channels with 0 videos from TubeArchivist.""" + + session.headers.update( + { + "Authorization": f"Token {os.getenv('TA_API_TOKEN')}", + } + ) + + if not all( + [ + os.getenv("TA_URL"), + os.getenv("TA_API_TOKEN"), + ] + ): + print("Please set all required environment variables. See .env.sample") + sys.exit(1) + + if channel_id: + channel_query = {"query": {"term": {"channel_id": channel_id}}, "_source": ["channel_id", "channel_name"]} + result = es.search(index="ta_channel", body=channel_query) + if result["hits"]["hits"]: + channel_name = result["hits"]["hits"][0]["_source"].get("channel_name", "Unknown") + channels_to_delete = [(channel_id, channel_name)] + else: + print(f"Channel {channel_id} not found.") + return + else: + channels_to_delete = get_channels_with_zero_videos(include_subscribed) + + if not channels_to_delete: + print("No channels to delete.") + return + + print(f"\nChannels to delete: {len(channels_to_delete)}") + for channel_id, channel_name in channels_to_delete: + print(f" - {channel_id} ({channel_name})") + + if dry_run: + print("\nDry run mode - no channels were deleted.") + return + + if not yes: + confirmation = input("\nAre you sure you want to delete these channels? (y/n): ") + if confirmation.lower() != "y": + print("Aborted.") + return + + print("\nDeleting channels...") + for channel_id, channel_name in channels_to_delete: + status_code, response = delete_channel(channel_id) + if status_code in (200, 204): + print(f" Deleted: {channel_id} ({channel_name})") + else: + print(f" Failed to delete {channel_id} ({channel_name}): {status_code} - {response}") + + +if __name__ == "__main__": + cli()