diff --git a/internal/server/http.go b/internal/server/http.go index dd46ad9..b8fad6b 100644 --- a/internal/server/http.go +++ b/internal/server/http.go @@ -30,6 +30,10 @@ import ( "github.com/pilot-protocol/cosift/internal/store" ) +// maxRequestBodySize caps the JSON request body to prevent OOM on +// maliciously large payloads. 1 MiB is generous even for batch endpoints. +const maxRequestBodySize = 1 << 20 + // FetchFn fetches and parses a single URL. Used by /contents on store-miss. // Empty title/text + nil error is acceptable; callers can decide what to do. // Implementations are responsible for politeness — server doesn't rate-limit. @@ -497,7 +501,7 @@ func (s *Server) handleFeedback(w http.ResponseWriter, r *http.Request) { return } var req FeedbackRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, maxRequestBodySize)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -683,7 +687,7 @@ type AdminRecrawlResponse struct { // next pass. This split keeps the API endpoint stateless and quick. func (s *Server) handleAdminRecrawl(w http.ResponseWriter, r *http.Request) { var req AdminRecrawlRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, maxRequestBodySize)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -744,7 +748,7 @@ type AdminRecrawlByDomainResponse struct { // batch-/contents cap pattern). Larger sweeps should be split. func (s *Server) handleAdminRecrawlByDomain(w http.ResponseWriter, r *http.Request) { var req AdminRecrawlByDomainRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, maxRequestBodySize)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -831,7 +835,7 @@ func (s *Server) handleAdminReembed(w http.ResponseWriter, r *http.Request) { } var req AdminReembedRequest if r.ContentLength > 0 { - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, maxRequestBodySize)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -2189,7 +2193,7 @@ type ContentsBatchItem struct { func (s *Server) handleContentsBatch(w http.ResponseWriter, r *http.Request) { start := time.Now() var req ContentsBatchRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, maxRequestBodySize)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, fmt.Sprintf("invalid json body: %v", err)) return } diff --git a/internal/server/http_test.go b/internal/server/http_test.go index 2496c58..c4bdf1a 100644 --- a/internal/server/http_test.go +++ b/internal/server/http_test.go @@ -1,6 +1,7 @@ package server import ( + "bytes" "context" "encoding/json" "fmt" @@ -2728,6 +2729,37 @@ func TestAdminRecrawlBadInput(t *testing.T) { } } +func TestAdminRecrawlRejectsOversizedBody(t *testing.T) { + s, _ := store.OpenMemory() + t.Cleanup(func() { s.Close() }) + srv := New(s).WithAdminToken("k") + httpSrv := httptest.NewServer(srv.Handler()) + defer httpSrv.Close() + + // Send a body larger than the 1 MiB limit. + large := make([]byte, 2<<20) + for i := range large { + large[i] = ' ' + } + // Make it valid JSON so the decoder (not the parser) trips the limit. + large[0] = '{' + large[len(large)-1] = '}' + + req, _ := http.NewRequest("POST", httpSrv.URL+"/admin/recrawl", bytes.NewReader(large)) + req.Header.Set("Authorization", "Bearer k") + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("unexpected error sending request: %v", err) + } + resp.Body.Close() + // MaxBytesReader returns 413 (StatusRequestEntityTooLarge) or the decoder + // error surfaces as 400. Either is acceptable. + if resp.StatusCode != 400 && resp.StatusCode != 413 { + t.Errorf("oversized body: got %d, want 400 or 413", resp.StatusCode) + } +} + // paraphraseChat returns a fixed paraphrase JSON array — lets the expand path // be exercised without real LLM calls. type paraphraseChat struct {