From b00661c2d4791079c942422eb3d89ae42ed0ef07 Mon Sep 17 00:00:00 2001 From: matthew-pilot Date: Thu, 28 May 2026 07:08:40 +0000 Subject: [PATCH] fix: add MaxHeaderBytes and body size limits to HTTP servers (PILOT-107) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ListenAndServe and pebble-serve: add MaxHeaderBytes (1 MB). pebble-serve: add missing IdleTimeout. All json.NewDecoder(r.Body) calls: wrap with http.MaxBytesReader (w, r.Body, 1<<20) to prevent unbounded request body allocation. Fixes: PILOT-107 — Cosift HTTP server has no request body size limit --- cmd/cosift/pebble_serve.go | 12 +++++++----- internal/server/http.go | 11 ++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cmd/cosift/pebble_serve.go b/cmd/cosift/pebble_serve.go index bbb9f02..42fed2a 100644 --- a/cmd/cosift/pebble_serve.go +++ b/cmd/cosift/pebble_serve.go @@ -408,6 +408,8 @@ func runPebbleServe(ctx context.Context, cfg *config.Config, args []string) erro ReadHeaderTimeout: 10 * time.Second, ReadTimeout: 30 * time.Second, WriteTimeout: 60 * time.Second, + IdleTimeout: 120 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MB } log.Printf("pebble-serve: listening on %s (PebbleStore at %s)", *addr, *dir) @@ -3006,7 +3008,7 @@ type searchRequest struct { func (s *pebbleHTTP) handleSearchPOST(w http.ResponseWriter, r *http.Request) { var req searchRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body: "+err.Error()) return } @@ -3069,7 +3071,7 @@ type findSimilarRequest struct { func (s *pebbleHTTP) handleFindSimilarPOST(w http.ResponseWriter, r *http.Request) { var req findSimilarRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body: "+err.Error()) return } @@ -3161,7 +3163,7 @@ func (req synthRequest) toValues() url.Values { func (s *pebbleHTTP) handleAnswerPOST(w http.ResponseWriter, r *http.Request) { var req synthRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body: "+err.Error()) return } @@ -3171,7 +3173,7 @@ func (s *pebbleHTTP) handleAnswerPOST(w http.ResponseWriter, r *http.Request) { func (s *pebbleHTTP) handleResearchPOST(w http.ResponseWriter, r *http.Request) { var req synthRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body: "+err.Error()) return } @@ -5810,7 +5812,7 @@ type contentsBatchItem struct { func (s *pebbleHTTP) handleContentsBatch(w http.ResponseWriter, r *http.Request) { start := time.Now() var req contentsBatchReq - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body: "+err.Error()) return } diff --git a/internal/server/http.go b/internal/server/http.go index dd46ad9..47ad9c3 100644 --- a/internal/server/http.go +++ b/internal/server/http.go @@ -497,7 +497,7 @@ func (s *Server) handleFeedback(w http.ResponseWriter, r *http.Request) { return } var req FeedbackRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -683,7 +683,7 @@ type AdminRecrawlResponse struct { // next pass. This split keeps the API endpoint stateless and quick. func (s *Server) handleAdminRecrawl(w http.ResponseWriter, r *http.Request) { var req AdminRecrawlRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -744,7 +744,7 @@ type AdminRecrawlByDomainResponse struct { // batch-/contents cap pattern). Larger sweeps should be split. func (s *Server) handleAdminRecrawlByDomain(w http.ResponseWriter, r *http.Request) { var req AdminRecrawlByDomainRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -831,7 +831,7 @@ func (s *Server) handleAdminReembed(w http.ResponseWriter, r *http.Request) { } var req AdminReembedRequest if r.ContentLength > 0 { - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, "invalid JSON body") return } @@ -2189,7 +2189,7 @@ type ContentsBatchItem struct { func (s *Server) handleContentsBatch(w http.ResponseWriter, r *http.Request) { start := time.Now() var req ContentsBatchRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil { writeProblem(w, http.StatusBadRequest, fmt.Sprintf("invalid json body: %v", err)) return } @@ -3152,6 +3152,7 @@ func ListenAndServe(ctx context.Context, addr string, h http.Handler) error { ReadTimeout: 30 * time.Second, WriteTimeout: 60 * time.Second, IdleTimeout: 120 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MB } errCh := make(chan error, 1) go func() { errCh <- srv.ListenAndServe() }()