From 57ef2199503387617b8af3d719c74089fb70dbd4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 8 May 2026 23:24:34 +0300 Subject: [PATCH 1/2] gh-79638: Test other HTTP error codes besides 403 in test_robotparser (#149569) Also, use urllib.request.urlcleanup() in NetworkTestCase. --- Lib/test/test_robotparser.py | 83 +++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 3ea0ec66fbfbe9..cd1477037e94b7 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -646,26 +646,23 @@ def test_group_without_user_agent(self): ) class BaseLocalNetworkTestCase: - def setUp(self): + @classmethod + def setUpClass(cls): # clear _opener global variable - self.addCleanup(urllib.request.urlcleanup) + cls.addClassCleanup(urllib.request.urlcleanup) - self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler) + cls.server = HTTPServer((socket_helper.HOST, 0), cls.RobotHandler) + cls.addClassCleanup(cls.server.server_close) - self.t = threading.Thread( + t = threading.Thread( name='HTTPServer serving', - target=self.server.serve_forever, + target=cls.server.serve_forever, # Short poll interval to make the test finish quickly. # Time between requests is short enough that we won't wake # up spuriously too many times. kwargs={'poll_interval':0.01}) - self.t.daemon = True # In case this function raises. - self.t.start() - - def tearDown(self): - self.server.shutdown() - self.t.join() - self.server.server_close() + cls.enterClassContext(threading_helper.start_threads([t])) + cls.addClassCleanup(cls.server.shutdown) SAMPLE_ROBOTS_TXT = b'''\ @@ -687,7 +684,6 @@ def do_GET(self): def log_message(self, format, *args): pass - @threading_helper.reap_threads def testRead(self): # Test that reading a weird robots.txt doesn't fail. addr = self.server.server_address @@ -702,31 +698,79 @@ def testRead(self): self.assertTrue(parser.can_fetch(agent, url + '/utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/utf8/%F0%9F%90%8D')) - self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) self.assertTrue(parser.can_fetch(agent, url + '/non-utf8/')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/%F0')) self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/\U0001f40d')) self.assertFalse(parser.can_fetch(agent, url + '/%2F[spam]/path')) -class PasswordProtectedSiteTestCase(BaseLocalNetworkTestCase, unittest.TestCase): +class HttpErrorsTestCase(BaseLocalNetworkTestCase, unittest.TestCase): class RobotHandler(BaseHTTPRequestHandler): def do_GET(self): - self.send_error(403, "Forbidden access") + self.send_error(self.server.return_code) def log_message(self, format, *args): pass - @threading_helper.reap_threads - def testPasswordProtectedSite(self): + def setUp(self): + # Make sure that a valid code is set in the test. + self.server.return_code = None + + def testUnauthorized(self): + self.server.return_code = 401 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/some/file.html')) + + def testForbidden(self): + self.server.return_code = 403 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/some/file.html')) + + def testNotFound(self): + self.server.return_code = 404 addr = self.server.server_address - url = 'http://' + socket_helper.HOST + ':' + str(addr[1]) + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertTrue(parser.can_fetch("*", robots_url)) + self.assertTrue(parser.can_fetch("*", url + '/path/file.html')) + + def testTeapot(self): + self.server.return_code = 418 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + "/robots.txt" + parser = urllib.robotparser.RobotFileParser() + parser.set_url(url) + parser.read() + self.assertTrue(parser.can_fetch("*", robots_url)) + self.assertTrue(parser.can_fetch("*", url + '/pot-1?milk-type=Cream')) + + def testServiceUnavailable(self): + self.server.return_code = 503 + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' robots_url = url + "/robots.txt" parser = urllib.robotparser.RobotFileParser() parser.set_url(url) parser.read() self.assertFalse(parser.can_fetch("*", robots_url)) + self.assertFalse(parser.can_fetch("*", url + '/path/file.html')) @support.requires_working_socket() @@ -738,6 +782,7 @@ class NetworkTestCase(unittest.TestCase): @classmethod def setUpClass(cls): support.requires('network') + cls.addClassCleanup(urllib.request.urlcleanup) with socket_helper.transient_internet(cls.base_url): cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt) cls.parser.read() From 45c47d26c230086163ac1ef0aa9f955f794fb69c Mon Sep 17 00:00:00 2001 From: Jeff Lyon <146767590+secengjeff@users.noreply.github.com> Date: Fri, 8 May 2026 13:33:05 -0700 Subject: [PATCH 2/2] gh-149496: Fix MacOSTest.test_default regression when BROWSER env var is set (GH-149579) gh-149496: Fix MacOSTest.test_default failing when BROWSER env var is set MacOSTest.test_default calls webbrowser.get() and asserts it returns a MacOS instance. When BROWSER is set in the environment (e.g. BROWSER=open, a common macOS workaround for the old osascript-based implementation), register_standard_browsers() registers a GenericBrowser as the preferred browser instead, causing the assertion to fail. This is a regression introduced in gh-137586, which added MacOSTest and moved test_default into it from MacOSXOSAScriptTest. MacOSXOSAScriptTest had an identical setUp() guard added in gh-131254 specifically to fix this same failure. The guard was not carried over to MacOSTest. Add setUp() to MacOSTest to unset BROWSER for the duration of each test, restoring the isolation that was already established as the correct pattern for macOS webbrowser tests. --- Lib/test/test_webbrowser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_webbrowser.py b/Lib/test/test_webbrowser.py index 51d627d24c5a8a..82f14ca968f266 100644 --- a/Lib/test/test_webbrowser.py +++ b/Lib/test/test_webbrowser.py @@ -340,6 +340,10 @@ def close(self): @requires_subprocess() class MacOSTest(unittest.TestCase): + def setUp(self): + env = self.enterContext(os_helper.EnvironmentVarGuard()) + env.unset("BROWSER") + def test_default(self): browser = webbrowser.get() self.assertIsInstance(browser, webbrowser.MacOS)