Skip to content

Commit 7f619c2

Browse files
Gepeto Escalanteagustingroh
authored andcommitted
feat(settings):SP-384 Implement scan configuration parameters
1 parent 5be762a commit 7f619c2

File tree

10 files changed

+606
-11
lines changed

10 files changed

+606
-11
lines changed

CHANGELOG.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.13.0] - 2026-02-03
1011
### Added
12+
- Added `file_snippet` scan configuration support in `scanoss.json` for engine tuning parameters (`min_snippet_hits`, `min_snippet_lines`, `honour_file_exts`, `ranking_enabled`, `ranking_threshold`, `skip_headers`, `skip_headers_limit`)
13+
- Added CLI scan configuration options with resolution priority (file_snippet > CLI)
14+
- Added `FileSnippet` class for scan configuration management and resolution
1115

12-
- Upcoming changes...
1316
## [0.12.1] - 2026-01-08
1417
### Changed
1518
- Updated slf4j from 2.0.16 to 2.0.17
@@ -141,4 +144,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
141144
[0.10.1]: https://github.com/scanoss/scanoss.java/compare/v0.10.0...v0.10.1
142145
[0.11.0]: https://github.com/scanoss/scanoss.java/compare/v0.10.1...v0.11.0
143146
[0.12.0]: https://github.com/scanoss/scanoss.java/compare/v0.11.0...v0.12.0
144-
[0.12.1]: https://github.com/scanoss/scanoss.java/compare/v0.12.0...v0.12.1
147+
[0.12.1]: https://github.com/scanoss/scanoss.java/compare/v0.12.0...v0.12.1
148+
[0.13.0]: https://github.com/scanoss/scanoss.java/compare/v0.12.1...v0.13.0

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.scanoss</groupId>
88
<artifactId>scanoss</artifactId>
9-
<version>0.12.1</version>
9+
<version>0.13.0</version>
1010
<packaging>jar</packaging>
1111
<name>scanoss.java</name>
1212
<url>https://github.com/scanoss/scanoss.java</url>

src/main/java/com/scanoss/Scanner.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.scanoss.processor.*;
3232
import com.scanoss.rest.ScanApi;
3333
import com.scanoss.settings.Bom;
34+
import com.scanoss.settings.FileSnippet;
3435
import com.scanoss.settings.ScanossSettings;
3536
import com.scanoss.utils.JsonUtils;
3637
import lombok.*;
@@ -104,6 +105,8 @@ public class Scanner {
104105
private final ScanFileProcessor scanFileProcessor;
105106
private final WfpFileProcessor wfpFileProcessor;
106107
private final ScanossSettings settings;
108+
private final FileSnippet cliFileSnippet; // CLI-provided scan config (lowest priority)
109+
private final FileSnippet fileSnippet; // Resolved scan config (after priority merge)
107110
private final ScannerPostProcessor postProcessor;
108111
private final FilterConfig filterConfig;
109112
private Predicate<Path> fileFilter;
@@ -116,7 +119,8 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
116119
Integer snippetLimit, String customCert, Proxy proxy,
117120
Winnowing winnowing, ScanApi scanApi,
118121
ScanFileProcessor scanFileProcessor, WfpFileProcessor wfpFileProcessor,
119-
ScanossSettings settings, ScannerPostProcessor postProcessor, FilterConfig filterConfig,
122+
ScanossSettings settings, FileSnippet cliFileSnippet, FileSnippet fileSnippet,
123+
ScannerPostProcessor postProcessor, FilterConfig filterConfig,
120124
Predicate<Path> fileFilter,
121125
Predicate<Path> folderFilter
122126
) {
@@ -137,20 +141,27 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
137141
this.snippetLimit = snippetLimit;
138142
this.customCert = customCert;
139143
this.proxy = proxy;
144+
this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build());
145+
this.cliFileSnippet = cliFileSnippet;
146+
// Resolve scan config: file_snippet (highest) > CLI (lowest)
147+
this.fileSnippet = this.settings.getResolvedScanConfig(
148+
Objects.requireNonNullElseGet(cliFileSnippet, () -> FileSnippet.builder().build()));
140149
this.winnowing = Objects.requireNonNullElseGet(winnowing, () ->
141150
Winnowing.builder().skipSnippets(skipSnippets).allExtensions(allExtensions).obfuscate(obfuscate)
142151
.hpsm(hpsm).snippetLimit(snippetLimit)
152+
.skipHeaders(this.fileSnippet.getSkipHeaders() != null && this.fileSnippet.getSkipHeaders())
153+
.skipHeadersLimit(this.fileSnippet.getSkipHeadersLimit() != null ? this.fileSnippet.getSkipHeadersLimit() : 0)
143154
.build());
144155
this.scanApi = Objects.requireNonNullElseGet(scanApi, () ->
145156
ScanApi.builder().url(url).apiKey(apiKey).timeout(timeout).retryLimit(retryLimit).flags(scanFlags)
146-
.sbomType(sbomType).sbom(sbom).customCert(customCert).proxy(proxy).settings(settings)
157+
.sbomType(sbomType).sbom(sbom).customCert(customCert).proxy(proxy).settings(this.settings)
158+
.fileSnippet(this.fileSnippet)
147159
.build());
148160
this.scanFileProcessor = Objects.requireNonNullElseGet(scanFileProcessor, () ->
149161
ScanFileProcessor.builder().winnowing(this.winnowing).scanApi(this.scanApi).build());
150162
this.wfpFileProcessor = Objects.requireNonNullElseGet(wfpFileProcessor, () -> WfpFileProcessor.builder()
151163
.winnowing(this.winnowing)
152164
.build());
153-
this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build());
154165
this.postProcessor = Objects.requireNonNullElseGet(postProcessor, () ->
155166
ScannerPostProcessor.builder().build());
156167

src/main/java/com/scanoss/Winnowing.java

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ public class Winnowing {
8080
@Builder.Default
8181
private int snippetLimit = MAX_LONG_LINE_CHARS; // Enable limiting of size of a single line of snippet generation
8282
@Builder.Default
83+
private boolean skipHeaders = false; // Skip license headers, comments and imports at the beginning of files
84+
@Builder.Default
85+
private int skipHeadersLimit = 0; // Maximum number of header lines to skip (0 = auto-detect)
86+
@Builder.Default
8387
private Map<String, String> obfuscationMap = new ConcurrentHashMap<>();
8488

8589
/**
@@ -168,6 +172,12 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
168172
wfpBuilder.append(String.format("hpsm=%s\n", Hpsm.calcHpsm(contents)));
169173
}
170174

175+
int skipLines = 0;
176+
if (this.skipHeaders) {
177+
skipLines = detectHeaderLines(fileContents, this.skipHeadersLimit);
178+
log.trace("Skipping {} header lines for snippet generation: {}", skipLines, filename);
179+
}
180+
171181
String gram = "";
172182
List<Long> window = new ArrayList<>();
173183
char normalized;
@@ -183,7 +193,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
183193
} else {
184194
normalized = WinnowingUtils.normalize(c);
185195
}
186-
if (normalized > 0) {
196+
if (normalized > 0 && line > skipLines) {
187197
gram += normalized;
188198
if (gram.length() >= ScanossConstants.GRAM) {
189199
Long gramCRC32 = crc32c(gram);
@@ -312,6 +322,69 @@ private Boolean skipSnippets(@NonNull String filename, char[] contents) {
312322
return false;
313323
}
314324

325+
/**
326+
* Detect the number of header lines at the beginning of a file.
327+
* Header lines include license comment blocks, single-line comments,
328+
* blank lines, and import/package statements.
329+
*
330+
* @param contents file contents as char array
331+
* @param maxLines maximum number of header lines to detect (0 = no limit)
332+
* @return number of header lines detected
333+
*/
334+
int detectHeaderLines(char[] contents, int maxLines) {
335+
int headerLines = 0;
336+
boolean inBlockComment = false;
337+
int lineStart = 0;
338+
339+
for (int i = 0; i <= contents.length; i++) {
340+
if (i == contents.length || contents[i] == '\n') {
341+
String line = new String(contents, lineStart, i - lineStart).trim();
342+
343+
if (inBlockComment) {
344+
headerLines++;
345+
if (line.contains("*/")) {
346+
inBlockComment = false;
347+
}
348+
} else if (line.isEmpty()) {
349+
headerLines++;
350+
} else if (line.startsWith("//") || line.startsWith("#!") || line.startsWith("# ")) {
351+
headerLines++;
352+
} else if (line.startsWith("/*")) {
353+
headerLines++;
354+
if (!line.contains("*/")) {
355+
inBlockComment = true;
356+
}
357+
} else if (line.startsWith("*") || line.startsWith("* ")) {
358+
headerLines++;
359+
} else if (isImportOrPackageLine(line)) {
360+
headerLines++;
361+
} else {
362+
break; // Non-header line found
363+
}
364+
365+
if (maxLines > 0 && headerLines >= maxLines) {
366+
break;
367+
}
368+
369+
lineStart = i + 1;
370+
}
371+
}
372+
373+
return headerLines;
374+
}
375+
376+
/**
377+
* Check if a line is an import or package declaration.
378+
*
379+
* @param line trimmed source line
380+
* @return true if the line is an import/package/include statement
381+
*/
382+
private boolean isImportOrPackageLine(String line) {
383+
return line.startsWith("import ") || line.startsWith("package ") ||
384+
line.startsWith("from ") || line.startsWith("#include ") ||
385+
line.startsWith("using ") || line.startsWith("require ");
386+
}
387+
315388
/**
316389
* Try to detect if this is a text file or not
317390
*

src/main/java/com/scanoss/cli/ScanCommandLine.java

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.scanoss.Scanner;
2626
import com.scanoss.exceptions.ScannerException;
2727
import com.scanoss.exceptions.WinnowingException;
28+
import com.scanoss.settings.FileSnippet;
2829
import com.scanoss.settings.ScanossSettings;
2930
import com.scanoss.utils.JsonUtils;
3031
import com.scanoss.utils.ProxyUtils;
@@ -105,6 +106,27 @@ class ScanCommandLine implements Runnable {
105106
@picocli.CommandLine.Option(names = {"-H", "--hpsm"}, description = "Use High Precision Snippet Matching algorithm")
106107
private boolean enableHpsm = false;
107108

109+
@picocli.CommandLine.Option(names = {"--min-snippet-hits"}, description = "Minimum snippet hits required (0 = unset, uses server config)")
110+
private int minSnippetHits = 0;
111+
112+
@picocli.CommandLine.Option(names = {"--min-snippet-lines"}, description = "Minimum snippet lines required (0 = unset, uses server config)")
113+
private int minSnippetLines = 0;
114+
115+
@picocli.CommandLine.Option(names = {"--honour-file-exts"}, description = "Honour file extensions (true|false|unset)", arity = "1")
116+
private String honourFileExts = null;
117+
118+
@picocli.CommandLine.Option(names = {"--ranking"}, description = "Enable/disable ranking (true|false|unset)", arity = "1")
119+
private String ranking = null;
120+
121+
@picocli.CommandLine.Option(names = {"--ranking-threshold"}, description = "Ranking threshold value (-1 = unset, uses server config)")
122+
private int rankingThreshold = -1;
123+
124+
@picocli.CommandLine.Option(names = {"--skip-headers"}, description = "Skip license headers, comments and imports at the beginning of files (applies locally)")
125+
private boolean skipHeaders = false;
126+
127+
@picocli.CommandLine.Option(names = {"--skip-headers-limit"}, description = "Skip limit for license headers (0 = unset, applies locally)")
128+
private int skipHeadersLimit = 0;
129+
108130
@picocli.CommandLine.Parameters(arity = "1", description = "file/folder to scan")
109131
private String fileFolder;
110132

@@ -160,13 +182,13 @@ public void run() {
160182
printMsg(err, String.format("Using flags %s", scanFlags));
161183
}
162184
}
185+
FileSnippet cliFileSnippet = buildCliScanConfig();
163186
scanner = Scanner.builder().skipSnippets(skipSnippets).allFolders(allFolders).allExtensions(allExtensions)
164187
.hiddenFilesFolders(allHidden).numThreads(numThreads).url(apiUrl).apiKey(apiKey)
165188
.retryLimit(retryLimit).timeout(Duration.ofSeconds(timeoutLimit)).scanFlags(scanFlags)
166189
.snippetLimit(snippetLimit).customCert(caCertPem).proxy(proxy).hpsm(enableHpsm)
167-
.settings(settings).obfuscate(obfuscate)
190+
.settings(settings).obfuscate(obfuscate).cliFileSnippet(cliFileSnippet)
168191
.build();
169-
170192
File f = new File(fileFolder);
171193
if (!f.exists()) {
172194
throw new RuntimeException(String.format("Error: File or folder does not exist: %s\n", fileFolder));
@@ -198,6 +220,38 @@ private String loadFileToString(@NonNull String filename) {
198220
}
199221
}
200222

223+
/**
224+
* Build a ScanConfig from CLI arguments.
225+
*
226+
* @return ScanConfig populated with CLI-provided values
227+
*/
228+
private FileSnippet buildCliScanConfig() {
229+
FileSnippet.FileSnippetBuilder builder = FileSnippet.builder()
230+
.minSnippetHits(minSnippetHits)
231+
.minSnippetLines(minSnippetLines)
232+
.rankingThreshold(rankingThreshold)
233+
.skipHeaders(skipHeaders)
234+
.skipHeadersLimit(skipHeadersLimit);
235+
236+
builder.honourFileExts(parseTriStateBoolean(honourFileExts));
237+
builder.rankingEnabled(parseTriStateBoolean(ranking));
238+
239+
return builder.build();
240+
}
241+
242+
/**
243+
* Parse a tri-state boolean string value.
244+
*
245+
* @param value the string value ("true", "false", "unset", or null)
246+
* @return Boolean.TRUE, Boolean.FALSE, or null for unset
247+
*/
248+
private static Boolean parseTriStateBoolean(String value) {
249+
if (value == null || value.equalsIgnoreCase("unset")) {
250+
return null;
251+
}
252+
return Boolean.parseBoolean(value);
253+
}
254+
201255
/**
202256
* Scan the specified file and output the results
203257
*
@@ -245,7 +299,7 @@ private void scanFolder(String folder) {
245299
if (CommandLine.debug) {
246300
e.printStackTrace(err);
247301
}
248-
throw new RuntimeException(String.format("Something went wrong while scanning %s.", folder));
302+
throw new RuntimeException(String.format("Something went wrong while scanning %s.", folder), e);
249303
}
250304
}
251305
}

src/main/java/com/scanoss/rest/ScanApi.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.scanoss.dto.SbomLegacy;
2626
import com.scanoss.exceptions.ScanApiException;
2727
import com.scanoss.settings.Rule;
28+
import com.scanoss.settings.FileSnippet;
2829
import com.scanoss.settings.ScanossSettings;
2930
import com.scanoss.utils.JsonUtils;
3031
import com.scanoss.utils.PackageDetails;
@@ -36,6 +37,7 @@
3637
import okhttp3.tls.HandshakeCertificates;
3738

3839
import java.io.IOException;
40+
import java.nio.charset.StandardCharsets;
3941
import java.io.InterruptedIOException;
4042
import java.net.Proxy;
4143
import java.time.Duration;
@@ -72,12 +74,14 @@ public class ScanApi {
7274
private Proxy proxy; // Proxy configuration
7375
private String baseUrl; // SCANOSS base API URI (to used instead of url)
7476
private ScanossSettings settings;
77+
private FileSnippet fileSnippet; // Resolved scan configuration parameters
7578
@SuppressWarnings("unused")
7679
private ScanApi(String scanType, Duration timeout, Integer retryLimit, String url, String apiKey, String flags,
7780
String sbomType, String sbom,
7881
OkHttpClient okHttpClient, Map<String, String> headers, String customCert,
79-
Proxy proxy, String baseUrl, ScanossSettings settings) {
82+
Proxy proxy, String baseUrl, ScanossSettings settings, FileSnippet fileSnippet) {
8083
this.settings = settings;
84+
this.fileSnippet = fileSnippet;
8185
this.scanType = scanType;
8286
this.timeout = timeout;
8387
this.retryLimit = retryLimit;
@@ -178,6 +182,30 @@ public String scan(String wfp, String context, int scanID) throws ScanApiExcepti
178182
data.put("type", "identify");
179183
}
180184

185+
// Add scan configuration parameters as base64-encoded JSON in scanoss-settings key
186+
if (fileSnippet != null) {
187+
Map<String, Object> settingsMap = new LinkedHashMap<>();
188+
if (fileSnippet.isMinSnippetHitsSet()) {
189+
settingsMap.put("min_snippet_hits", fileSnippet.getMinSnippetHits());
190+
}
191+
if (fileSnippet.isMinSnippetLinesSet()) {
192+
settingsMap.put("min_snippet_lines", fileSnippet.getMinSnippetLines());
193+
}
194+
if (fileSnippet.isHonourFileExtsSet()) {
195+
settingsMap.put("honour_file_exts", fileSnippet.getHonourFileExts());
196+
}
197+
if (fileSnippet.isRankingEnabledSet()) {
198+
settingsMap.put("ranking_enabled", fileSnippet.getRankingEnabled());
199+
}
200+
if (fileSnippet.isRankingThresholdSet()) {
201+
settingsMap.put("ranking_threshold", fileSnippet.getRankingThreshold());
202+
}
203+
if (!settingsMap.isEmpty()) {
204+
String json = JsonUtils.toJson(settingsMap);
205+
String encoded = Base64.getEncoder().encodeToString(json.getBytes(StandardCharsets.UTF_8));
206+
headers.put("scanoss-settings", encoded);
207+
}
208+
}
181209

182210
Request request; // Create multipart request
183211
try {

0 commit comments

Comments
 (0)