Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions delete-channels-zero-videos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""
This script deletes channels that have 0 videos from TubeArchivist.
It will also remove any associated playlists
"""

import os
import sys
import requests
import click
from elasticsearch import Elasticsearch
from dotenv import load_dotenv


load_dotenv()

es = Elasticsearch(
[os.getenv("ES_HOST")], basic_auth=(os.getenv("ES_USER"), os.getenv("ES_PASSWORD"))
)

session = requests.Session()

TA_URL = os.getenv("TA_URL")
API_URL = f"{TA_URL}/api"


def get_channels_with_zero_videos(include_subscribed=False):
"""Get all channel IDs that have 0 videos in the video index."""
must_query = []
if not include_subscribed:
must_query.append({"term": {"channel_subscribed": False}})

channel_query = {
"size": 10000,
"query": {"bool": {"must": must_query}} if must_query else {"match_all": {}},
"_source": ["channel_id", "channel_name"],
}

channel_results = es.search(index="ta_channel", body=channel_query)

channels = {
hit["_source"]["channel_id"]: hit["_source"].get("channel_name", "Unknown")
for hit in channel_results["hits"]["hits"]
}

channel_ids = list(channels.keys())
print(f"Total channels: {len(channel_ids)}")

channels_without_videos = []

for channel_id in channel_ids:
video_query = {"query": {"term": {"channel.channel_id": channel_id}}}
video_results = es.search(index="ta_video", body=video_query)

if video_results["hits"]["total"]["value"] == 0:
channels_without_videos.append((channel_id, channels[channel_id]))

print(f"Channels without videos: {len(channels_without_videos)}")
return channels_without_videos


def delete_channel(channel_id):
"""Delete a channel from TubeArchivist using DELETE request."""
url = f"{API_URL}/channel/{channel_id}/"
r = session.delete(url)
return r.status_code, r.text


@click.command()
@click.option("--dry-run", is_flag=True, help="Show channels that would be deleted without actually deleting.")
@click.option("--channel-id", type=str, help="Delete a specific channel by ID.")
@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt.")
@click.option("--include-subscribed", is_flag=True, help="Also delete subscribed channels (default: only unsubscribed).")
def cli(dry_run, channel_id, yes, include_subscribed):
"""Delete channels with 0 videos from TubeArchivist."""

session.headers.update(
{
"Authorization": f"Token {os.getenv('TA_API_TOKEN')}",
}
)

if not all(
[
os.getenv("TA_URL"),
os.getenv("TA_API_TOKEN"),
]
):
print("Please set all required environment variables. See .env.sample")
sys.exit(1)

if channel_id:
channel_query = {"query": {"term": {"channel_id": channel_id}}, "_source": ["channel_id", "channel_name"]}
result = es.search(index="ta_channel", body=channel_query)
if result["hits"]["hits"]:
channel_name = result["hits"]["hits"][0]["_source"].get("channel_name", "Unknown")
channels_to_delete = [(channel_id, channel_name)]
else:
print(f"Channel {channel_id} not found.")
return
else:
channels_to_delete = get_channels_with_zero_videos(include_subscribed)

if not channels_to_delete:
print("No channels to delete.")
return

print(f"\nChannels to delete: {len(channels_to_delete)}")
for channel_id, channel_name in channels_to_delete:
print(f" - {channel_id} ({channel_name})")

if dry_run:
print("\nDry run mode - no channels were deleted.")
return

if not yes:
confirmation = input("\nAre you sure you want to delete these channels? (y/n): ")
if confirmation.lower() != "y":
print("Aborted.")
return

print("\nDeleting channels...")
for channel_id, channel_name in channels_to_delete:
status_code, response = delete_channel(channel_id)
if status_code in (200, 204):
print(f" Deleted: {channel_id} ({channel_name})")
else:
print(f" Failed to delete {channel_id} ({channel_name}): {status_code} - {response}")


if __name__ == "__main__":
cli()