Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion inc/scanoss.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#define WFP_LN 4
#define WFP_REC_LN 18

#define SCANOSS_VERSION "5.4.23"
#define SCANOSS_VERSION "5.4.24"

/* Log files */
#define SCAN_LOG "/tmp/scanoss_scan.log"
Expand All @@ -59,6 +59,7 @@
#define DISABLE_SERVER_INFO 4096
#define DISABLE_HEALTH 8192
#define ENABLE_HIGH_ACCURACY 16384
#define ENABLE_LICENSE_FULL_REPORT 32768

#define MAX_SBOM_ITEMS 2000
#define SHORTEST_PATHS_QTY 4000 // number of shortest path to evaluate
Expand Down
1 change: 1 addition & 0 deletions src/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ These settings can also be specified in %s\n\
| 4096 | Disable extended server stats (default: enabled) |\n\
| 8192 | Disable health layer (default: enabled) |\n\
| 16384 | Enable high accuracy, slower scan (default: disabled) |\n\
| 32768 | Enable full license report (default: disabled) |\n\
+-------+-------------------------------------------------------+\n\
Examples:\n\
scanoss -F 12 DIRECTORY Scan DIRECTORY without license and dependency data\n\
Expand Down
73 changes: 63 additions & 10 deletions src/license.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,28 @@ void license_free_list(struct license_list * ptr)
ptr->count = 0;
}

static int license_priority(int id)
{
static const int priority[] = {0, 31, 32, 33, 35, 3, 1, 2, 5};
static const int n = sizeof(priority) / sizeof(priority[0]);

for (int i = 0; i < n; i++)
if (priority[i] == id)
return i;

return n; /* unknown IDs go last */
}

static int license_compare_by_id(const void *a, const void *b)
{
const struct license_type *la = a;
const struct license_type *lb = b;

/* IDs 5 and 6 should go to the end */
bool a_is_last = (la->id == 5 || la->id == 6);
bool b_is_last = (lb->id == 5 || lb->id == 6);
int pa = license_priority(la->id);
int pb = license_priority(lb->id);

if (a_is_last && !b_is_last)
return 1;
if (!a_is_last && b_is_last)
return -1;
if (pa != pb)
return pa - pb;

return la->id - lb->id;
}
Expand Down Expand Up @@ -291,7 +300,7 @@ static char *json_from_license(uint32_t *crclist, char *buffer, char *license, i
if (!license_source_id)
return buffer;
//skip scancode licenses starting with "LicenseRef"
if (!strncmp(license_source_id, "scancode", 8) && !strncmp(license, "LicenseRef", 10))
if (!strncmp(license_source_id, "scancode", 8) && strstr(license, "LicenseRef"))
return buffer;
Comment on lines +303 to 304
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't drop mixed SPDX expressions just because they contain LicenseRef.

This now skips the whole Scancode record for values like MIT AND LicenseRef-scancode-foo, so the valid SPDX part is lost too. Restrict the skip to pure LicenseRef-* entries, or ignore only the LicenseRef-* token when expanding compound expressions.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/license.c` around lines 294 - 295, The current check in src/license.c
using license_source_id and strstr(license, "LicenseRef") drops entire Scancode
records when any LicenseRef token appears; change it so only pure LicenseRef
entries are skipped and compound expressions keep their SPDX parts: in the block
that uses license_source_id, replace the broad strstr test with a stricter test
that returns buffer only when the whole license string is a single LicenseRef-*
token (e.g., matches start with "LicenseRef" and contains no other SPDX
operators or tokens), and for compound expressions (strings containing SPDX
operators like AND/OR/() or multiple tokens) strip/ignore only the LicenseRef-*
tokens when expanding/normalizing the expression instead of returning early;
update the logic around the current strncmp/strstr usage and any normalization
code that consumes license so it removes LicenseRef-* tokens before further
parsing.

/* Calculate CRC to avoid duplicates */
uint32_t CRC = string_crc32c(license);
Expand All @@ -313,8 +322,49 @@ static char *json_from_license(uint32_t *crclist, char *buffer, char *license, i
len += sprintf(buffer + len, "\"name\": \"%s\",", license);
len += osadl_print_license(buffer + len, license, true);
len += sprintf(buffer + len, "\"source\": \"%s\"", license_source_id);
if (!strstr(license, "LicenseRef"))

/* Check if license contains AND/OR/WITH operators */
if (strstr(license, " AND ") || strstr(license, " OR ") || strstr(license, " WITH "))
{
/* Build "urls" object with each individual license mapped to its URL */
len += sprintf(buffer + len, ",\"urls\": {");
char lic_copy[MAX_FIELD_LN];
strncpy(lic_copy, license, MAX_FIELD_LN - 1);
lic_copy[MAX_FIELD_LN - 1] = '\0';

char first_license[MAX_FIELD_LN] = "\0";
bool first_entry = true;
char *saveptr = NULL;
char *token = strtok_r(lic_copy, " ()", &saveptr);

while (token)
{
/* Skip AND/OR/WITH operators */
if (strcmp(token, "AND") == 0 || strcmp(token, "OR") == 0 || strcmp(token, "WITH") == 0)
{
token = strtok_r(NULL, " ()", &saveptr);
continue;
}
if (!first_entry)
len += sprintf(buffer + len, ",");
else
{
strncpy(first_license, token, MAX_FIELD_LN - 1);
first_entry = false;
}
len += sprintf(buffer + len, "\"%s\": \"https://spdx.org/licenses/%s.html\"", token, token);
Comment on lines +342 to +355
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

WITH exceptions need SPDX /exceptions/ URLs.

In expressions like GPL-2.0-only WITH Classpath-exception-2.0, the token after WITH is an SPDX exception, not a license. This code currently emits spdx.org/licenses/... for it, which produces an invalid URL.

💡 Suggested fix
-		char *token = strtok_r(lic_copy, " ()", &saveptr);
+		char *token = strtok_r(lic_copy, " ()", &saveptr);
+		const char *spdx_kind = "licenses";
 
 		while (token)
 		{
 			/* Skip AND/OR/WITH operators */
-			if (strcmp(token, "AND") == 0 || strcmp(token, "OR") == 0 || strcmp(token, "WITH") == 0)
+			if (strcmp(token, "AND") == 0 || strcmp(token, "OR") == 0 || strcmp(token, "WITH") == 0)
 			{
+				spdx_kind = (strcmp(token, "WITH") == 0) ? "exceptions" : "licenses";
 				token = strtok_r(NULL, " ()", &saveptr);
 				continue;
 			}
 			if (!first_entry)
 				len += sprintf(buffer + len, ",");
@@
-			len += sprintf(buffer + len, "\"%s\": \"https://spdx.org/licenses/%s.html\"", token, token);
+			len += sprintf(buffer + len, "\"%s\": \"https://spdx.org/%s/%s.html\"", token, spdx_kind, token);
+			spdx_kind = "licenses";
 			token = strtok_r(NULL, " ()", &saveptr);
 		}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/license.c` around lines 333 - 346, The code currently treats the token
after a "WITH" operator as a license and generates a licenses URL; change the
logic so that when you encounter "WITH" (in the block referencing token,
saveptr) you set a flag (e.g., next_is_exception) and advance to the next token,
then when emitting the JSON entry (the code using token, buffer, len,
first_entry, first_license) use that flag to choose
"https://spdx.org/exceptions/%s.html" instead of the licenses path and reset the
flag; ensure first_license is only set for actual licenses (not exceptions) and
clear the flag after use.

token = strtok_r(NULL, " ()", &saveptr);
}
len += sprintf(buffer + len, "}");

/* "url" points to the first license only */
len += sprintf(buffer + len, ",\"url\": \"https://spdx.org/licenses/%s.html\"", first_license);
}
else
{
len += sprintf(buffer + len, ",\"url\": \"https://spdx.org/licenses/%s.html\"", license);
}

len += sprintf(buffer + len, "}");
return (buffer + len);
}
Expand Down Expand Up @@ -507,14 +557,17 @@ void print_licenses(component_data_t *comp)
len += sprintf(result + len, "\"licenses\": [");
buffer = result + len;
bool first = true;

int last_id = -1;
/* Sort licenses by id (ascending) */
if (licenses_by_type.count > 1)
qsort(licenses_by_type.licenses, licenses_by_type.count, sizeof(struct license_type), license_compare_by_id);

for (int i = 0; i < licenses_by_type.count; i++)
{
buffer = license_to_json(crclist, buffer, licenses_by_type.licenses[i].text, licenses_by_type.licenses[i].id, &first);
if (last_id >= 0 && last_id != licenses_by_type.licenses[i].id && !first && !full_license_report)
break;
last_id = licenses_by_type.licenses[i].id;
}

len = buffer - result;
Expand Down
2 changes: 2 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ int main(int argc, char **argv)
case 'F':
engine_flags_cmd_line = atol(optarg);
engine_flags |= engine_flags_cmd_line;
if (engine_flags & ENABLE_LICENSE_FULL_REPORT)
full_license_report = true;
break;

case 'l':
Expand Down
6 changes: 4 additions & 2 deletions src/match.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,8 +965,10 @@ void match_select_best(scan_data_t *scan)
}

//If the best match is not good or is not identified be prefer the candidate.
if ((!best_match_component->identified && match_component->identified) ||
(path_is_third_party(best_match_component) < path_is_third_party(match_component)))
if (best_match_component->identified > match_component->identified)
continue;

if (path_is_third_party(best_match_component) < path_is_third_party(match_component))
{
scanlog("Replacing best match for a prefered component\n");
scan->matches_list_array[i]->best_match = item->match;
Expand Down
Loading