|
4 | 4 | import unittest |
5 | 5 | from w3lib.url import (is_url, safe_url_string, safe_download_url, |
6 | 6 | url_query_parameter, add_or_replace_parameter, url_query_cleaner, |
7 | | - file_uri_to_path, path_to_file_uri, any_to_uri, urljoin_rfc, |
8 | | - canonicalize_url, parse_url) |
| 7 | + file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri, |
| 8 | + urljoin_rfc, canonicalize_url, parse_url) |
9 | 9 | from six.moves.urllib.parse import urlparse |
10 | 10 |
|
11 | 11 |
|
@@ -574,6 +574,94 @@ def test_canonicalize_url_idna_exceptions(self): |
574 | 574 | label=u"example"*11)) |
575 | 575 |
|
576 | 576 |
|
| 577 | +class DataURITests(unittest.TestCase): |
| 578 | + |
| 579 | + def test_default_mediatype_charset(self): |
| 580 | + result = parse_data_uri("data:,A%20brief%20note") |
| 581 | + self.assertEqual(result.media_type, "text/plain") |
| 582 | + self.assertEqual(result.media_type_parameters, {"charset": "US-ASCII"}) |
| 583 | + self.assertEqual(result.data, b"A brief note") |
| 584 | + |
| 585 | + def test_text_uri(self): |
| 586 | + result = parse_data_uri(u"data:,A%20brief%20note") |
| 587 | + self.assertEqual(result.data, b"A brief note") |
| 588 | + |
| 589 | + def test_bytes_uri(self): |
| 590 | + result = parse_data_uri(b"data:,A%20brief%20note") |
| 591 | + self.assertEqual(result.data, b"A brief note") |
| 592 | + |
| 593 | + def test_unicode_uri(self): |
| 594 | + result = parse_data_uri(u"data:,é") |
| 595 | + self.assertEqual(result.data, u"é".encode('utf-8')) |
| 596 | + |
| 597 | + def test_default_mediatype(self): |
| 598 | + result = parse_data_uri("data:;charset=iso-8859-7,%be%d3%be") |
| 599 | + self.assertEqual(result.media_type, "text/plain") |
| 600 | + self.assertEqual(result.media_type_parameters, |
| 601 | + {"charset": "iso-8859-7"}) |
| 602 | + self.assertEqual(result.data, b"\xbe\xd3\xbe") |
| 603 | + |
| 604 | + def test_text_charset(self): |
| 605 | + result = parse_data_uri("data:text/plain;charset=iso-8859-7,%be%d3%be") |
| 606 | + self.assertEqual(result.media_type, "text/plain") |
| 607 | + self.assertEqual(result.media_type_parameters, |
| 608 | + {"charset": "iso-8859-7"}) |
| 609 | + self.assertEqual(result.data, b"\xbe\xd3\xbe") |
| 610 | + |
| 611 | + def test_mediatype_parameters(self): |
| 612 | + result = parse_data_uri('data:text/plain;' |
| 613 | + 'foo=%22foo;bar%5C%22%22;' |
| 614 | + 'charset=utf-8;' |
| 615 | + 'bar=%22foo;%5C%22foo%20;/%20,%22,' |
| 616 | + '%CE%8E%CE%A3%CE%8E') |
| 617 | + |
| 618 | + self.assertEqual(result.media_type, "text/plain") |
| 619 | + self.assertEqual(result.media_type_parameters, |
| 620 | + {"charset": "utf-8", |
| 621 | + "foo": 'foo;bar"', |
| 622 | + "bar": 'foo;"foo ;/ ,'}) |
| 623 | + self.assertEqual(result.data, b"\xce\x8e\xce\xa3\xce\x8e") |
| 624 | + |
| 625 | + def test_base64(self): |
| 626 | + result = parse_data_uri("data:text/plain;base64," |
| 627 | + "SGVsbG8sIHdvcmxkLg%3D%3D") |
| 628 | + self.assertEqual(result.media_type, "text/plain") |
| 629 | + self.assertEqual(result.data, b"Hello, world.") |
| 630 | + |
| 631 | + def test_base64_spaces(self): |
| 632 | + result = parse_data_uri("data:text/plain;base64,SGVsb%20G8sIH%0A%20%20" |
| 633 | + "dvcm%20%20%20xk%20Lg%3D%0A%3D") |
| 634 | + self.assertEqual(result.media_type, "text/plain") |
| 635 | + self.assertEqual(result.data, b"Hello, world.") |
| 636 | + |
| 637 | + result = parse_data_uri("data:text/plain;base64,SGVsb G8sIH\n " |
| 638 | + "dvcm xk Lg%3D\n%3D") |
| 639 | + self.assertEqual(result.media_type, "text/plain") |
| 640 | + self.assertEqual(result.data, b"Hello, world.") |
| 641 | + |
| 642 | + def test_wrong_base64_param(self): |
| 643 | + with self.assertRaises(ValueError): |
| 644 | + parse_data_uri("data:text/plain;baes64,SGVsbG8sIHdvcmxkLg%3D%3D") |
| 645 | + |
| 646 | + def test_missing_comma(self): |
| 647 | + with self.assertRaises(ValueError): |
| 648 | + parse_data_uri("data:A%20brief%20note") |
| 649 | + |
| 650 | + def test_missing_scheme(self): |
| 651 | + with self.assertRaises(ValueError): |
| 652 | + parse_data_uri("text/plain,A%20brief%20note") |
| 653 | + |
| 654 | + def test_wrong_scheme(self): |
| 655 | + with self.assertRaises(ValueError): |
| 656 | + parse_data_uri("http://example.com/") |
| 657 | + |
| 658 | + def test_scheme_case_insensitive(self): |
| 659 | + result = parse_data_uri("DATA:,A%20brief%20note") |
| 660 | + self.assertEqual(result.data, b"A brief note") |
| 661 | + result = parse_data_uri("DaTa:,A%20brief%20note") |
| 662 | + self.assertEqual(result.data, b"A brief note") |
| 663 | + |
| 664 | + |
577 | 665 | if __name__ == "__main__": |
578 | 666 | unittest.main() |
579 | 667 |
|
0 commit comments