From f88762978ecb631d576c7b9964adaa9a0bd96394 Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Tue, 31 Mar 2026 19:17:27 +0200 Subject: [PATCH 1/7] add FileContentsLimit feature to file contents service --- config/app-config-dev.json | 3 +- config/app-config-prod.json | 3 +- pkg/config/server_config.go | 4 ++ pkg/service/filecontents_service.go | 13 +++++ pkg/service/filecontents_service_test.go | 65 ++++++++++++++++++++++-- test-support/scanoss.sh | 5 ++ 6 files changed, 86 insertions(+), 7 deletions(-) diff --git a/config/app-config-dev.json b/config/app-config-dev.json index 47a5b27..b9b6d2c 100644 --- a/config/app-config-dev.json +++ b/config/app-config-dev.json @@ -17,6 +17,7 @@ "RankingThreshold": 0, "MinSnippetHits": 0, "MinSnippetLines": 0, - "HonourFileExts": true + "HonourFileExts": true, + "FileContentsLimit": 50 } } diff --git a/config/app-config-prod.json b/config/app-config-prod.json index d990fae..75611b8 100644 --- a/config/app-config-prod.json +++ b/config/app-config-prod.json @@ -40,7 +40,8 @@ "RankingThreshold": 0, "MinSnippetHits": 0, "MinSnippetLines": 0, - "HonourFileExts": true + "HonourFileExts": true, + "FileContentsLimit": 50 }, "TLS": { "CertFile": "", diff --git a/pkg/config/server_config.go b/pkg/config/server_config.go index 56e0118..6f3274f 100644 --- a/pkg/config/server_config.go +++ b/pkg/config/server_config.go @@ -80,6 +80,8 @@ type ServerConfig struct { MinSnippetHits int `env:"SCANOSS_MIN_SNIPPET_HITS"` // Minimum snippet hits to consider a snippet match MinSnippetLines int `env:"SCANOSS_MIN_SNIPPET_LINES"` // Minimum snippet lines to consider a snippet match HonourFileExts bool `env:"SCANOSS_HONOUR_FILE_EXTS"` // Honour file extensions to filter snippet matches + // file contents + FileContentsLimit int64 `env:"SCANOSS_FILE_CONTENTS_LIMIT"` // Maximum file contents size in MB (default 50) } TLS struct { CertFile string `env:"SCAN_TLS_CERT"` // TLS Certificate @@ -140,6 +142,8 @@ func setServerConfigDefaults(cfg *ServerConfig) { cfg.Scanning.MinSnippetLines = 0 // Lets the engine decide on minimum snippet hits on the file total lines cfg.Scanning.HonourFileExts = true cfg.Scanning.AllowFlagsOverride = false // Disallow clients overriding the default flags if it's set server-side + // file contents + cfg.Scanning.FileContentsLimit = 50 // Default 50 MB } // LoadFile loads the specified file and returns its contents in a string array. diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index 6039d8a..ad3c85c 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -19,6 +19,7 @@ package service import ( "bytes" "context" + "encoding/json" "fmt" "net/http" "os/exec" @@ -81,6 +82,18 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { http.Error(w, "ERROR recovering file contents", http.StatusInternalServerError) return } + limitBytes := s.config.Scanning.FileContentsLimit * 1024 * 1024 + if limitBytes > 0 && int64(len(output)) > limitBytes { + zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", len(output), s.config.Scanning.FileContentsLimit, md5) + w.Header().Set(ContentTypeKey, ApplicationJSON) + w.WriteHeader(http.StatusRequestEntityTooLarge) + resp := map[string]string{ + "error": fmt.Sprintf("file contents size (%d bytes) exceeds the maximum allowed limit (%d MB)", + len(output), s.config.Scanning.FileContentsLimit), + } + _ = json.NewEncoder(w).Encode(resp) + return + } charset := detectCharset(output) if s.config.App.Trace { zs.Debugf("Sending back contents: %v - '%s'", len(output), output) diff --git a/pkg/service/filecontents_service_test.go b/pkg/service/filecontents_service_test.go index 68f4018..adad3fd 100644 --- a/pkg/service/filecontents_service_test.go +++ b/pkg/service/filecontents_service_test.go @@ -17,6 +17,7 @@ package service import ( "bytes" + "encoding/json" "fmt" "io" "net/http" @@ -39,11 +40,12 @@ func TestFileContents(t *testing.T) { apiService := NewAPIService(myConfig) tests := []struct { - name string - input map[string]string - binary string - telemetry bool - want int + name string + input map[string]string + binary string + telemetry bool + fileContentsLimit int64 + want int }{ { name: "Test Contents - empty", @@ -79,6 +81,30 @@ func TestFileContents(t *testing.T) { input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, want: http.StatusOK, }, + { + name: "Test Contents - within file size limit", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, + fileContentsLimit: 50, // 50 MB + want: http.StatusOK, + }, + { + name: "Test Contents - limit disabled (zero)", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, + fileContentsLimit: 0, // disabled + want: http.StatusOK, + }, + { + name: "Test Contents - exceeds file size limit", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + fileContentsLimit: 1, // 1 MB - large output (~1.1 MB) will exceed this + want: http.StatusRequestEntityTooLarge, + }, } for _, test := range tests { @@ -92,6 +118,7 @@ func TestFileContents(t *testing.T) { } myConfig.Scanning.ScanBinary = test.binary myConfig.Telemetry.Enabled = test.telemetry + myConfig.Scanning.FileContentsLimit = test.fileContentsLimit req := newReq("GET", "http://localhost/file_contents/{md5}", "", test.input) w := httptest.NewRecorder() apiService.FileContents(w, req) @@ -108,6 +135,34 @@ func TestFileContents(t *testing.T) { } } +func TestFileContentsLimitExceeded(t *testing.T) { + err := zlog.NewSugaredDevLogger() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a sugared logger", err) + } + defer zlog.SyncZap() + myConfig := setupConfig(t) + myConfig.Scanning.ScanBinary = "../../test-support/scanoss.sh" + myConfig.Scanning.FileContentsLimit = 1 // 1 MB + apiService := NewAPIService(myConfig) + + // Use special md5 that triggers large output (~1.1 MB) exceeding the 1 MB limit + req := newReq("GET", "http://localhost/file_contents/{md5}", "", map[string]string{"md5": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}) + w := httptest.NewRecorder() + apiService.FileContents(w, req) + resp := w.Result() + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("an error was not expected when reading from request: %v", err) + } + assert.Equal(t, http.StatusRequestEntityTooLarge, resp.StatusCode) + assert.Contains(t, resp.Header.Get("Content-Type"), "application/json") + var result map[string]string + err = json.Unmarshal(body, &result) + assert.NoError(t, err, "response body should be valid JSON") + assert.Contains(t, result["error"], "exceeds the maximum allowed limit") +} + func TestDetectCharset(t *testing.T) { tests := []struct { name string diff --git a/test-support/scanoss.sh b/test-support/scanoss.sh index 0f5dcc0..9ff128e 100755 --- a/test-support/scanoss.sh +++ b/test-support/scanoss.sh @@ -32,6 +32,11 @@ if [ "$1" == "-k" ] || [ "$2" == "-k" ] || [ "$3" == "-k" ] ; then echo "Error: Invalid MD5 hash format: $md5" >&2 exit 1 fi + # Simulate large file contents (>1MB) for a specific md5 + if [ "$md5" == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ]; then + head -c 1100000 /dev/zero | tr '\0' 'A' + exit 0 + fi echo "file contents: $md5" echo "line 2" echo "line 3" From 950c69b2cabd1c29582ee145235e34542d695c21 Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Tue, 31 Mar 2026 19:23:15 +0200 Subject: [PATCH 2/7] fix linter issues --- pkg/service/utils_service_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/service/utils_service_test.go b/pkg/service/utils_service_test.go index db3a863..a7aa173 100644 --- a/pkg/service/utils_service_test.go +++ b/pkg/service/utils_service_test.go @@ -37,6 +37,8 @@ import ( ) // newReq sets up a request with specified URL variables. +// +//nolint:unparam // method is always GET for now but kept for flexibility func newReq(method, path, body string, vars map[string]string) *http.Request { r := httptest.NewRequest(method, path, strings.NewReader(body)) return mux.SetURLVars(r, vars) From cb16812e0eacf51d32f4c4ab019bc6b1e5bf1ddd Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Tue, 31 Mar 2026 20:23:45 +0200 Subject: [PATCH 3/7] update CHANGELOG --- CHANGELOG.md | 4 ++++ pkg/service/filecontents_service.go | 1 + 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e9b1f0..459ff09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- Added configurable file contents size limit (`SCANOSS_FILE_CONTENTS_LIMIT`). + - Limits the maximum file size returned by the `file_contents` endpoint (default: 50 MB). + - Returns HTTP 400 when the file exceeds the configured limit. ## [1.6.5] - 2026-03-26 ### Fixed diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index ad3c85c..7b09f18 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -83,6 +83,7 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { return } limitBytes := s.config.Scanning.FileContentsLimit * 1024 * 1024 + //unlimited for FileContentsLimit <= 0 if limitBytes > 0 && int64(len(output)) > limitBytes { zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", len(output), s.config.Scanning.FileContentsLimit, md5) w.Header().Set(ContentTypeKey, ApplicationJSON) From 896f09d7af3ba0c9763ed30c113877c3d8638f4e Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Wed, 1 Apr 2026 12:24:09 +0200 Subject: [PATCH 4/7] fix linter issue --- pkg/service/filecontents_service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index 7b09f18..b676cf3 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -83,7 +83,7 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { return } limitBytes := s.config.Scanning.FileContentsLimit * 1024 * 1024 - //unlimited for FileContentsLimit <= 0 + // unlimited for FileContentsLimit <= 0 if limitBytes > 0 && int64(len(output)) > limitBytes { zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", len(output), s.config.Scanning.FileContentsLimit, md5) w.Header().Set(ContentTypeKey, ApplicationJSON) From 95a74c565f21a3c2e2a3ef3bdbf94e1e3512fcd6 Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Tue, 7 Apr 2026 12:37:02 +0200 Subject: [PATCH 5/7] small code changes --- CHANGELOG.md | 4 ++-- pkg/service/filecontents_service.go | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 459ff09..55c8568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [1.6.6] - 2026-04-07 ### Added - Added configurable file contents size limit (`SCANOSS_FILE_CONTENTS_LIMIT`). - Limits the maximum file size returned by the `file_contents` endpoint (default: 50 MB). - - Returns HTTP 400 when the file exceeds the configured limit. + - Returns HTTP 400 with the error `file contents size exceeds` when the file exceeds the configured limit. ## [1.6.5] - 2026-03-26 ### Fixed diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index b676cf3..92223b9 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -82,10 +82,12 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { http.Error(w, "ERROR recovering file contents", http.StatusInternalServerError) return } + // convert the configured limit from MB to bytes. limitBytes := s.config.Scanning.FileContentsLimit * 1024 * 1024 + outputLen := int64(len(output)) // unlimited for FileContentsLimit <= 0 - if limitBytes > 0 && int64(len(output)) > limitBytes { - zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", len(output), s.config.Scanning.FileContentsLimit, md5) + if limitBytes > 0 && outputLen > limitBytes { + zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", outputLen, s.config.Scanning.FileContentsLimit, md5) w.Header().Set(ContentTypeKey, ApplicationJSON) w.WriteHeader(http.StatusRequestEntityTooLarge) resp := map[string]string{ @@ -97,13 +99,13 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { } charset := detectCharset(output) if s.config.App.Trace { - zs.Debugf("Sending back contents: %v - '%s'", len(output), output) + zs.Debugf("Sending back contents: %v - '%s'", outputLen, output) } else { - zs.Debugf("Sending back contents: %v", len(output)) + zs.Debugf("Sending back contents: %v", outputLen) } w.Header().Set(ContentTypeKey, fmt.Sprintf("text/plain; charset=%s", charset)) w.Header().Set(CharsetDetectedKey, charset) - w.Header().Set(ContentLengthKey, fmt.Sprintf("%d", len(output))) + w.Header().Set(ContentLengthKey, fmt.Sprintf("%d", outputLen)) printResponse(w, string(output), zs, false) } From face7733121f7e82c793d5dfcaf14cc387178596 Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Tue, 7 Apr 2026 19:04:09 +0200 Subject: [PATCH 6/7] change error message format, update tests --- pkg/service/filecontents_service.go | 14 +++----------- pkg/service/filecontents_service_test.go | 13 ++++--------- pkg/service/utils_service.go | 5 +++-- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index 92223b9..7a9ac07 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -19,7 +19,6 @@ package service import ( "bytes" "context" - "encoding/json" "fmt" "net/http" "os/exec" @@ -82,19 +81,12 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { http.Error(w, "ERROR recovering file contents", http.StatusInternalServerError) return } - // convert the configured limit from MB to bytes. - limitBytes := s.config.Scanning.FileContentsLimit * 1024 * 1024 outputLen := int64(len(output)) // unlimited for FileContentsLimit <= 0 - if limitBytes > 0 && outputLen > limitBytes { + if s.fileContentslimitBytes > 0 && outputLen > s.fileContentslimitBytes { zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", outputLen, s.config.Scanning.FileContentsLimit, md5) - w.Header().Set(ContentTypeKey, ApplicationJSON) - w.WriteHeader(http.StatusRequestEntityTooLarge) - resp := map[string]string{ - "error": fmt.Sprintf("file contents size (%d bytes) exceeds the maximum allowed limit (%d MB)", - len(output), s.config.Scanning.FileContentsLimit), - } - _ = json.NewEncoder(w).Encode(resp) + http.Error(w, fmt.Sprintf("file contents size (%d bytes) exceeds the maximum allowed limit (%d MB)", + outputLen, s.config.Scanning.FileContentsLimit), http.StatusRequestEntityTooLarge) return } charset := detectCharset(output) diff --git a/pkg/service/filecontents_service_test.go b/pkg/service/filecontents_service_test.go index adad3fd..480ea4b 100644 --- a/pkg/service/filecontents_service_test.go +++ b/pkg/service/filecontents_service_test.go @@ -17,7 +17,6 @@ package service import ( "bytes" - "encoding/json" "fmt" "io" "net/http" @@ -37,8 +36,6 @@ func TestFileContents(t *testing.T) { myConfig := setupConfig(t) myConfig.App.Trace = true myConfig.Scanning.ScanDebug = true - apiService := NewAPIService(myConfig) - tests := []struct { name string input map[string]string @@ -119,9 +116,10 @@ func TestFileContents(t *testing.T) { myConfig.Scanning.ScanBinary = test.binary myConfig.Telemetry.Enabled = test.telemetry myConfig.Scanning.FileContentsLimit = test.fileContentsLimit + svc := NewAPIService(myConfig) req := newReq("GET", "http://localhost/file_contents/{md5}", "", test.input) w := httptest.NewRecorder() - apiService.FileContents(w, req) + svc.FileContents(w, req) resp := w.Result() body, err := io.ReadAll(resp.Body) if err != nil { @@ -156,11 +154,8 @@ func TestFileContentsLimitExceeded(t *testing.T) { t.Fatalf("an error was not expected when reading from request: %v", err) } assert.Equal(t, http.StatusRequestEntityTooLarge, resp.StatusCode) - assert.Contains(t, resp.Header.Get("Content-Type"), "application/json") - var result map[string]string - err = json.Unmarshal(body, &result) - assert.NoError(t, err, "response body should be valid JSON") - assert.Contains(t, result["error"], "exceeds the maximum allowed limit") + assert.Contains(t, resp.Header.Get("Content-Type"), "text/plain") + assert.Contains(t, string(body), "exceeds the maximum allowed limit") } func TestDetectCharset(t *testing.T) { diff --git a/pkg/service/utils_service.go b/pkg/service/utils_service.go index 08c97e3..322e8a9 100644 --- a/pkg/service/utils_service.go +++ b/pkg/service/utils_service.go @@ -65,13 +65,14 @@ type TraceContextKey struct{} // APIService details. type APIService struct { - config *myconfig.ServerConfig + config *myconfig.ServerConfig + fileContentslimitBytes int64 } // NewAPIService instantiates an API Service instance for servicing the API requests. func NewAPIService(config *myconfig.ServerConfig) *APIService { setupMetrics() - return &APIService{config: config} + return &APIService{config: config, fileContentslimitBytes: config.Scanning.FileContentsLimit * 1024 * 1024} } // Structure for counting the total number of requests processed. From d4cbd27df857c9a4e7f5cb39e94e1c619b01e983 Mon Sep 17 00:00:00 2001 From: mscasso-scanoss Date: Wed, 8 Apr 2026 14:43:41 +0200 Subject: [PATCH 7/7] fix changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55c8568..bbb6204 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added configurable file contents size limit (`SCANOSS_FILE_CONTENTS_LIMIT`). - Limits the maximum file size returned by the `file_contents` endpoint (default: 50 MB). - - Returns HTTP 400 with the error `file contents size exceeds` when the file exceeds the configured limit. + - Returns HTTP 413 with the error `file contents size exceeds` when the file exceeds the configured limit. ## [1.6.5] - 2026-03-26 ### Fixed