Skip to content

Commit 93d942d

Browse files
timobrembeckclaudep
authored andcommitted
Fix encoding of utf-8 domain names
1 parent 8461710 commit 93d942d

File tree

3 files changed

+41
-8
lines changed

3 files changed

+41
-8
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
Unreleased
22

3+
* Fix encoding of utf-8 domain names (Timo Brembeck, #190)
34
* Move coverage view to management command (Timo Brembeck, #187)
45
* Add new management command `linkcheck_suggest_config`
56
* Delete coverage view

linkcheck/models.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
from datetime import timedelta
55
from http import HTTPStatus
6-
from urllib.parse import unquote
6+
from urllib.parse import unquote, urlparse
77

88
import requests
99
from django.conf import settings
@@ -230,13 +230,14 @@ def external_url(self):
230230
if self.internal:
231231
return None
232232

233-
# Remove URL fragment identifiers
234-
external_url = self.url.rsplit('#')[0]
235-
# Check that non-ascii chars are properly encoded
236-
try:
237-
external_url.encode('ascii')
238-
except UnicodeEncodeError:
239-
external_url = iri_to_uri(external_url)
233+
# Encode path and query and remove anchor fragment
234+
parsed = urlparse(self.url)
235+
external_url = parsed._replace(
236+
path=iri_to_uri(parsed.path),
237+
query=iri_to_uri(parsed.query),
238+
fragment=""
239+
).geturl()
240+
240241
logger.debug('External URL: %s', external_url)
241242
return external_url
242243

linkcheck/tests/test_linkcheck.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,37 @@ def test_external_check_200_utf8_not_encoded(self):
484484
self.assertEqual(uv.redirect_to, '')
485485
self.assertEqual(uv.type, 'external')
486486

487+
def test_external_check_200_utf8_domain(self):
488+
uv = Url(url='https://bafög.de/')
489+
uv.check_url()
490+
self.assertEqual(uv.status, True)
491+
self.assertEqual(uv.message, '302 Found')
492+
self.assertEqual(uv.get_message, 'Working temporary redirect')
493+
self.assertEqual(uv.error_message, '')
494+
self.assertEqual(uv.anchor_message, '')
495+
self.assertEqual(uv.ssl_status, True)
496+
self.assertEqual(uv.ssl_message, 'Valid SSL certificate')
497+
self.assertEqual(uv.get_status_code_display(), '302 Found')
498+
self.assertEqual(uv.get_redirect_status_code_display(), '200 OK')
499+
self.assertEqual(uv.type, 'external')
500+
# The actual redirect URL might be subject to change
501+
self.assertNotEqual(uv.redirect_to, '')
502+
503+
def test_external_check_200_punycode_domain(self):
504+
uv = Url(url='https://www.xn--jobbrse-stellenangebote-blc.de/')
505+
uv.check_url()
506+
self.assertEqual(uv.status, True)
507+
self.assertEqual(uv.message, '200 OK')
508+
self.assertEqual(uv.get_message, 'Working external link')
509+
self.assertEqual(uv.error_message, '')
510+
self.assertEqual(uv.anchor_message, '')
511+
self.assertEqual(uv.ssl_status, True)
512+
self.assertEqual(uv.ssl_message, 'Valid SSL certificate')
513+
self.assertEqual(uv.get_status_code_display(), '200 OK')
514+
self.assertEqual(uv.get_redirect_status_code_display(), None)
515+
self.assertEqual(uv.redirect_to, '')
516+
self.assertEqual(uv.type, 'external')
517+
487518
def test_external_check_301(self):
488519
uv = Url(url=f"{self.live_server_url}/http/301/")
489520
uv.check_url()

0 commit comments

Comments
 (0)