diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e9b1f0..bbb6204 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [1.6.6] - 2026-04-07 +### Added +- Added configurable file contents size limit (`SCANOSS_FILE_CONTENTS_LIMIT`). + - Limits the maximum file size returned by the `file_contents` endpoint (default: 50 MB). + - Returns HTTP 413 with the error `file contents size exceeds` when the file exceeds the configured limit. ## [1.6.5] - 2026-03-26 ### Fixed diff --git a/config/app-config-dev.json b/config/app-config-dev.json index 47a5b27..b9b6d2c 100644 --- a/config/app-config-dev.json +++ b/config/app-config-dev.json @@ -17,6 +17,7 @@ "RankingThreshold": 0, "MinSnippetHits": 0, "MinSnippetLines": 0, - "HonourFileExts": true + "HonourFileExts": true, + "FileContentsLimit": 50 } } diff --git a/config/app-config-prod.json b/config/app-config-prod.json index d990fae..75611b8 100644 --- a/config/app-config-prod.json +++ b/config/app-config-prod.json @@ -40,7 +40,8 @@ "RankingThreshold": 0, "MinSnippetHits": 0, "MinSnippetLines": 0, - "HonourFileExts": true + "HonourFileExts": true, + "FileContentsLimit": 50 }, "TLS": { "CertFile": "", diff --git a/pkg/config/server_config.go b/pkg/config/server_config.go index 56e0118..6f3274f 100644 --- a/pkg/config/server_config.go +++ b/pkg/config/server_config.go @@ -80,6 +80,8 @@ type ServerConfig struct { MinSnippetHits int `env:"SCANOSS_MIN_SNIPPET_HITS"` // Minimum snippet hits to consider a snippet match MinSnippetLines int `env:"SCANOSS_MIN_SNIPPET_LINES"` // Minimum snippet lines to consider a snippet match HonourFileExts bool `env:"SCANOSS_HONOUR_FILE_EXTS"` // Honour file extensions to filter snippet matches + // file contents + FileContentsLimit int64 `env:"SCANOSS_FILE_CONTENTS_LIMIT"` // Maximum file contents size in MB (default 50) } TLS struct { CertFile string `env:"SCAN_TLS_CERT"` // TLS Certificate @@ -140,6 +142,8 @@ func setServerConfigDefaults(cfg *ServerConfig) { cfg.Scanning.MinSnippetLines = 0 // Lets the engine decide on minimum snippet hits on the file total lines cfg.Scanning.HonourFileExts = true cfg.Scanning.AllowFlagsOverride = false // Disallow clients overriding the default flags if it's set server-side + // file contents + cfg.Scanning.FileContentsLimit = 50 // Default 50 MB } // LoadFile loads the specified file and returns its contents in a string array. diff --git a/pkg/service/filecontents_service.go b/pkg/service/filecontents_service.go index 6039d8a..7a9ac07 100644 --- a/pkg/service/filecontents_service.go +++ b/pkg/service/filecontents_service.go @@ -81,15 +81,23 @@ func (s APIService) FileContents(w http.ResponseWriter, r *http.Request) { http.Error(w, "ERROR recovering file contents", http.StatusInternalServerError) return } + outputLen := int64(len(output)) + // unlimited for FileContentsLimit <= 0 + if s.fileContentslimitBytes > 0 && outputLen > s.fileContentslimitBytes { + zs.Warnf("File contents size %d bytes exceeds limit %d MB for md5 %s", outputLen, s.config.Scanning.FileContentsLimit, md5) + http.Error(w, fmt.Sprintf("file contents size (%d bytes) exceeds the maximum allowed limit (%d MB)", + outputLen, s.config.Scanning.FileContentsLimit), http.StatusRequestEntityTooLarge) + return + } charset := detectCharset(output) if s.config.App.Trace { - zs.Debugf("Sending back contents: %v - '%s'", len(output), output) + zs.Debugf("Sending back contents: %v - '%s'", outputLen, output) } else { - zs.Debugf("Sending back contents: %v", len(output)) + zs.Debugf("Sending back contents: %v", outputLen) } w.Header().Set(ContentTypeKey, fmt.Sprintf("text/plain; charset=%s", charset)) w.Header().Set(CharsetDetectedKey, charset) - w.Header().Set(ContentLengthKey, fmt.Sprintf("%d", len(output))) + w.Header().Set(ContentLengthKey, fmt.Sprintf("%d", outputLen)) printResponse(w, string(output), zs, false) } diff --git a/pkg/service/filecontents_service_test.go b/pkg/service/filecontents_service_test.go index 68f4018..480ea4b 100644 --- a/pkg/service/filecontents_service_test.go +++ b/pkg/service/filecontents_service_test.go @@ -36,14 +36,13 @@ func TestFileContents(t *testing.T) { myConfig := setupConfig(t) myConfig.App.Trace = true myConfig.Scanning.ScanDebug = true - apiService := NewAPIService(myConfig) - tests := []struct { - name string - input map[string]string - binary string - telemetry bool - want int + name string + input map[string]string + binary string + telemetry bool + fileContentsLimit int64 + want int }{ { name: "Test Contents - empty", @@ -79,6 +78,30 @@ func TestFileContents(t *testing.T) { input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, want: http.StatusOK, }, + { + name: "Test Contents - within file size limit", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, + fileContentsLimit: 50, // 50 MB + want: http.StatusOK, + }, + { + name: "Test Contents - limit disabled (zero)", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "37f7cd1e657aa3c30ece35995b4c59e5"}, + fileContentsLimit: 0, // disabled + want: http.StatusOK, + }, + { + name: "Test Contents - exceeds file size limit", + binary: "../../test-support/scanoss.sh", + telemetry: false, + input: map[string]string{"md5": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}, + fileContentsLimit: 1, // 1 MB - large output (~1.1 MB) will exceed this + want: http.StatusRequestEntityTooLarge, + }, } for _, test := range tests { @@ -92,9 +115,11 @@ func TestFileContents(t *testing.T) { } myConfig.Scanning.ScanBinary = test.binary myConfig.Telemetry.Enabled = test.telemetry + myConfig.Scanning.FileContentsLimit = test.fileContentsLimit + svc := NewAPIService(myConfig) req := newReq("GET", "http://localhost/file_contents/{md5}", "", test.input) w := httptest.NewRecorder() - apiService.FileContents(w, req) + svc.FileContents(w, req) resp := w.Result() body, err := io.ReadAll(resp.Body) if err != nil { @@ -108,6 +133,31 @@ func TestFileContents(t *testing.T) { } } +func TestFileContentsLimitExceeded(t *testing.T) { + err := zlog.NewSugaredDevLogger() + if err != nil { + t.Fatalf("an error '%s' was not expected when opening a sugared logger", err) + } + defer zlog.SyncZap() + myConfig := setupConfig(t) + myConfig.Scanning.ScanBinary = "../../test-support/scanoss.sh" + myConfig.Scanning.FileContentsLimit = 1 // 1 MB + apiService := NewAPIService(myConfig) + + // Use special md5 that triggers large output (~1.1 MB) exceeding the 1 MB limit + req := newReq("GET", "http://localhost/file_contents/{md5}", "", map[string]string{"md5": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}) + w := httptest.NewRecorder() + apiService.FileContents(w, req) + resp := w.Result() + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("an error was not expected when reading from request: %v", err) + } + assert.Equal(t, http.StatusRequestEntityTooLarge, resp.StatusCode) + assert.Contains(t, resp.Header.Get("Content-Type"), "text/plain") + assert.Contains(t, string(body), "exceeds the maximum allowed limit") +} + func TestDetectCharset(t *testing.T) { tests := []struct { name string diff --git a/pkg/service/utils_service.go b/pkg/service/utils_service.go index 08c97e3..322e8a9 100644 --- a/pkg/service/utils_service.go +++ b/pkg/service/utils_service.go @@ -65,13 +65,14 @@ type TraceContextKey struct{} // APIService details. type APIService struct { - config *myconfig.ServerConfig + config *myconfig.ServerConfig + fileContentslimitBytes int64 } // NewAPIService instantiates an API Service instance for servicing the API requests. func NewAPIService(config *myconfig.ServerConfig) *APIService { setupMetrics() - return &APIService{config: config} + return &APIService{config: config, fileContentslimitBytes: config.Scanning.FileContentsLimit * 1024 * 1024} } // Structure for counting the total number of requests processed. diff --git a/pkg/service/utils_service_test.go b/pkg/service/utils_service_test.go index db3a863..a7aa173 100644 --- a/pkg/service/utils_service_test.go +++ b/pkg/service/utils_service_test.go @@ -37,6 +37,8 @@ import ( ) // newReq sets up a request with specified URL variables. +// +//nolint:unparam // method is always GET for now but kept for flexibility func newReq(method, path, body string, vars map[string]string) *http.Request { r := httptest.NewRequest(method, path, strings.NewReader(body)) return mux.SetURLVars(r, vars) diff --git a/test-support/scanoss.sh b/test-support/scanoss.sh index 0f5dcc0..9ff128e 100755 --- a/test-support/scanoss.sh +++ b/test-support/scanoss.sh @@ -32,6 +32,11 @@ if [ "$1" == "-k" ] || [ "$2" == "-k" ] || [ "$3" == "-k" ] ; then echo "Error: Invalid MD5 hash format: $md5" >&2 exit 1 fi + # Simulate large file contents (>1MB) for a specific md5 + if [ "$md5" == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ]; then + head -c 1100000 /dev/zero | tr '\0' 'A' + exit 0 + fi echo "file contents: $md5" echo "line 2" echo "line 3"