From 5cf74e7053abf5ae9f753e68d699ad7f43ac91de Mon Sep 17 00:00:00 2001 From: Peter Nerlich Date: Mon, 24 Nov 2025 09:37:28 +0100 Subject: [PATCH 1/2] truncate long data urls in log --- linkcheck/listeners.py | 3 +++ linkcheck/utils.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index 0642aeb..b8e99d7 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -96,6 +96,9 @@ def do_check_instance_links(sender, instance, wait=False): if len(url) > MAX_URL_LENGTH: # We cannot handle url longer than MAX_URL_LENGTH at the moment + if url.startswith("data:"): + # If the URL is a data URL, it might occupy a LOT of space in the logs without being useful – truncate it + url = url[:64] logger.warning('URL exceeding max length will be skipped: %s', url) continue diff --git a/linkcheck/utils.py b/linkcheck/utils.py index b40dbb5..dc92c2a 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -134,6 +134,9 @@ def update_urls(urls, content_type, object_id): if len(url) > MAX_URL_LENGTH: # We cannot handle url longer than MAX_URL_LENGTH at the moment + if url.startswith("data:"): + # If the URL is a data URL, it might occupy a LOT of space in the logs without being useful – truncate it + url = url[:64] logger.warning("URL exceeding max length will be skipped: %s", url) continue From dc8f4d5af7d95708fd9d0fdcb77e7a2de071db1d Mon Sep 17 00:00:00 2001 From: Peter Nerlich Date: Mon, 24 Nov 2025 09:43:42 +0100 Subject: [PATCH 2/2] log where a too long URL came from --- linkcheck/listeners.py | 2 +- linkcheck/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index b8e99d7..170b51a 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -99,7 +99,7 @@ def do_check_instance_links(sender, instance, wait=False): if url.startswith("data:"): # If the URL is a data URL, it might occupy a LOT of space in the logs without being useful – truncate it url = url[:64] - logger.warning('URL exceeding max length will be skipped: %s', url) + logger.warning('URL exceeding max length will be skipped: %s (in %r)', url, instance) continue u, created = Url.objects.get_or_create(url=url) diff --git a/linkcheck/utils.py b/linkcheck/utils.py index dc92c2a..c189fec 100644 --- a/linkcheck/utils.py +++ b/linkcheck/utils.py @@ -137,7 +137,7 @@ def update_urls(urls, content_type, object_id): if url.startswith("data:"): # If the URL is a data URL, it might occupy a LOT of space in the logs without being useful – truncate it url = url[:64] - logger.warning("URL exceeding max length will be skipped: %s", url) + logger.warning("URL exceeding max length will be skipped: %s (in %r)", url, instance) continue url, url_created = Url.objects.get_or_create(url=url)