Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/spdx_tools/spdx/parser/json/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,33 @@
# SPDX-License-Identifier: Apache-2.0
import json

from beartype.typing import Dict
from beartype.typing import Any, Dict

from spdx_tools.spdx.model import Document
from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser

# chars we don't want to see in SBOMs
CONTROL_CHARS_MAP = {
8: None, # ASCII/UTF-8: backspace
12: None, # ASCII/UTF-8: formfeed
}


def remove_control_chars_from_value(value: Any) -> Any:
if isinstance(value, str):
return value.translate(CONTROL_CHARS_MAP)
elif isinstance(value, list):
for i in range(len(value)):
value[i] = remove_control_chars_from_value(value[i])
return value


def remove_json_control_chars_hook(pairs: list) -> dict:
return {k: remove_control_chars_from_value(v) for k, v in pairs}


def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
with open(file_name, encoding=encoding) as file:
input_doc_as_dict: Dict = json.load(file)
input_doc_as_dict: Dict = json.load(file, object_pairs_hook=remove_json_control_chars_hook)

return JsonLikeDictParser().parse(input_doc_as_dict)
46 changes: 46 additions & 0 deletions tests/spdx/data/ControlCharacters.spdx.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"spdxVersion": "SPDX-2.2",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"creationInfo": {
"created": "2020-11-24T01:12:27Z",
"creators": ["Person: Nisha \b\f K (nishak@vmware.com)"]
},
"name": "golang-dist",
"documentNamespace": "https://swinslow.net/spdx-examples/example7/golang-dist-492dfde4-318b-49f7-b48c-934bfafbde48",
"documentDescribes": ["SPDXRef-golang-dist"],
"packages": [
{
"name": "go1.16.4.linux-amd64",
"SPDXID": "SPDXRef-golang-dist",
"downloadLocation": "https://golang.org/dl/go1.16.4.linux-amd64.tar.gz",
"versionInfo": "1.16.4",
"filesAnalyzed": false,
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "7154e88f5a8047aad4b80ebace58a059e36e7e2e4eb3b383127a28c711b4ff59"
}
],
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "LicenseRef-Golang-BSD-plus-Patents",
"copyrightText": "Copyright (c) 2009 The Go Authors. \b All rights reserved."
},
{
"name": "go",
"SPDXID": "SPDXRef-go-compiler",
"downloadLocation": "https://golang.org/dl/go1.16.4.linux-amd64.tar.gz",
"versionInfo": "1.16.4",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION"
}
],
"hasExtractedLicensingInfos": [
{
"licenseId": "LicenseRef-Golang-BSD-plus-Patents",
"extractedText": "Golang BSD plus Patents \"\\\/\b\f\n\r\t"
}
]
}
11 changes: 11 additions & 0 deletions tests/spdx/parser/jsonlikedict/test_json_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

from spdx_tools.spdx.parser.json import json_parser


def test_parse_control_characters():
doc = json_parser.parse_from_file(
os.path.join(os.path.dirname(__file__), "../../data/ControlCharacters.spdx.json")
)
assert doc.creation_info.creators[0].name == "Nisha K"
assert doc.extracted_licensing_info[0].extracted_text == 'Golang BSD plus Patents "\\/\n\r\t'
Loading