Skip to content

Commit 0bb3ee2

Browse files
Add YAML frontmatter syntax validation for license data
Signed-off-by: Aliasghar Jawadwala <sharksurfauto@gmail.com>
1 parent 022ddc8 commit 0bb3ee2

3 files changed

Lines changed: 35 additions & 3 deletions

File tree

src/licensedcode/frontmatter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def load_frontmatter(fd, encoding="utf-8", **defaults):
135135
text = fd.read()
136136

137137
else:
138-
with codecs.open(fd, "r", encoding) as f:
138+
with open(fd, "r", encoding=encoding) as f:
139139
text = f.read()
140140

141141
text = return_unicode(text, encoding)

src/licensedcode/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ def dump(self, licenses_data_dir):
498498
content = get_yaml_safe_text(content)
499499
output = dumps_frontmatter(content=content, metadata=metadata)
500500
license_file = self.license_file(licenses_data_dir=licenses_data_dir)
501-
with open(license_file, 'w') as of:
501+
with open(license_file, 'w', encoding='utf-8') as of:
502502
of.write(output)
503503

504504
def load(self, license_file, check_consistency=True):
@@ -2418,7 +2418,7 @@ def dump(self, rules_data_dir, **kwargs):
24182418
metadata.update(kwargs)
24192419
content = self.text
24202420
output = dumps_frontmatter(content=content, metadata=metadata)
2421-
with open(rule_file, 'w') as of:
2421+
with open(rule_file, 'w', encoding='utf-8') as of:
24222422
of.write(output)
24232423

24242424
def load(self, rule_file, with_checks=True):

tests/licensedcode/test_license_models.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,3 +660,35 @@ def test_get_key_phrases_ignores_nested_key_phrase_markup(self):
660660
raise Exception('Exception should be raised')
661661
except InvalidRuleRequiredPhrase:
662662
pass
663+
664+
665+
class TestLicenseYamlFrontmatterSyntax(FileBasedTesting):
666+
"""
667+
Validate that all license data files have valid YAML syntax.
668+
See: https://github.com/aboutcode-org/scancode-toolkit/issues/3947
669+
"""
670+
test_data_dir = TEST_DATA_DIR
671+
672+
def test_license_yaml_frontmatter_integrity(self):
673+
"""
674+
Ensure all .LICENSE files in licenses_data_dir have valid YAML syntax
675+
in their frontmatter section.
676+
"""
677+
from pathlib import Path
678+
from licensedcode.frontmatter import load_frontmatter
679+
from licensedcode.models import licenses_data_dir
680+
681+
licenses_path = Path(licenses_data_dir)
682+
errors = []
683+
684+
for license_file in sorted(licenses_path.glob('*.LICENSE')):
685+
try:
686+
load_frontmatter(str(license_file))
687+
except Exception as e:
688+
errors.append(f'{license_file.name}: {e}')
689+
690+
if errors:
691+
error_msg = '\n'.join(errors[:20]) # Show first 20 errors
692+
if len(errors) > 20:
693+
error_msg += f'\n... and {len(errors) - 20} more errors'
694+
assert False, f'Invalid YAML in {len(errors)} license files:\n{error_msg}'

0 commit comments

Comments
 (0)