From 46553f28758bbc77956438517babb7b4210704e5 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:06:25 -0500 Subject: [PATCH 01/15] fix(security): sanitize ingested HTML and guard rendered URLs Feed/article HTML is rendered via {@html}, with the CSP as the only defense against stored XSS. Add server-side sanitization so the CSP and sanitization are defense-in-depth rather than a single control. - feed.SanitizeHTML (bluemonday UGCPolicy): strips `, + mustDrop: []string{"hi

"}, + }, + { + name: "strips inline event handler", + in: ``, + mustDrop: []string{"onerror", "alert(1)"}, + mustKeep: []string{"x`, + mustDrop: []string{"javascript:"}, + mustKeep: []string{"x"}, + }, + { + name: "keeps benign formatting and links", + in: `

bold link

`, + mustKeep: []string{"bold", `href="https://example.com"`}, + }, + { + name: "empty in empty out", + in: "", + mustKeep: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := SanitizeHTML(tt.in) + for _, d := range tt.mustDrop { + if strings.Contains(got, d) { + t.Errorf("sanitized output still contains %q: %q", d, got) + } + } + for _, k := range tt.mustKeep { + if !strings.Contains(got, k) { + t.Errorf("sanitized output dropped %q: %q", k, got) + } + } + }) + } +} + +func TestSafeHTTPURL(t *testing.T) { + tests := []struct { + in, want string + }{ + {"https://example.com", "https://example.com"}, + {"http://example.com/a?b=c", "http://example.com/a?b=c"}, + {" https://example.com ", "https://example.com"}, + {"javascript:alert(1)", ""}, + {"data:text/html,", ""}, + {"ftp://example.com", ""}, + {"//example.com", ""}, + {"", ""}, + } + for _, tt := range tests { + if got := SafeHTTPURL(tt.in); got != tt.want { + t.Errorf("SafeHTTPURL(%q) = %q, want %q", tt.in, got, tt.want) + } + } +} diff --git a/internal/poller/poller.go b/internal/poller/poller.go index 7bddd75..b188a74 100644 --- a/internal/poller/poller.go +++ b/internal/poller/poller.go @@ -650,7 +650,10 @@ func (p *Poller) summarizeOne(ctx context.Context, articleID int64) { // paragraphs so the Reader's prose styling kicks in (the LLM emits plain // text, not HTML). if cleaned := strings.TrimSpace(res.Cleaned); cleaned != "" { - html := paragraphizePlain(cleaned) + // cleaned_html is rendered via {@html}; sanitize the model output (a + // prompt-injected feed could coax HTML out of the summarizer) before it + // is paragraphized and stored. + html := feed.SanitizeHTML(paragraphizePlain(cleaned)) if err := p.Store.UpdateCleanedHTML(ctx, articleID, html); err != nil { p.Logger.Warn("poller: persist cleaned_html", "article_id", articleID, "err", err) } diff --git a/internal/ttrss/ttrss.go b/internal/ttrss/ttrss.go index 52ed3d1..a5757d4 100644 --- a/internal/ttrss/ttrss.go +++ b/internal/ttrss/ttrss.go @@ -11,7 +11,6 @@ import ( "fmt" "io" "net/http" - "net/url" "strings" "time" @@ -141,19 +140,22 @@ func (s *Service) save(ctx context.Context, userID, feedID int64, it normItem) ( if guid == "" { return false, true, nil // nothing to identify or dedup on } - link := safeHTTPURL(it.link) - // Derive plain text from the HTML body the same way feed.Parse does, so - // imported items get a card excerpt and are full-text searchable. The hash - // is computed over the text (matching the parser) for consistency; guid - // dedup still dominates, so re-imports stay idempotent regardless. - text := feed.HTMLToText(it.content) + link := feed.SafeHTTPURL(it.link) + // Sanitize the embedded HTML before storing; imported bodies are rendered + // via {@html} like any feed article. Derive plain text from the sanitized + // body the same way feed.Parse does, so imported items get a card excerpt + // and are full-text searchable. The hash is computed over the text (matching + // the parser) for consistency; guid dedup still dominates, so re-imports + // stay idempotent regardless. + content := feed.SanitizeHTML(it.content) + text := feed.HTMLToText(content) saved, ins, err := s.Store.UpsertArticle(ctx, models.Article{ FeedID: feedID, GUID: guid, URL: link, Title: it.title, Author: it.author, - ContentHTML: it.content, + ContentHTML: content, ContentText: text, PublishedAt: it.published, ContentHash: feed.ContentHash(link, it.title, text), @@ -204,21 +206,6 @@ func (s *Service) ensureImportFeed(ctx context.Context, userID int64) (int64, er return f.ID, nil } -// safeHTTPURL returns raw only if it is an http(s) URL, else "". The link is -// stored and later rendered as an href; this drops javascript:/data: and -// other non-web schemes. Import never fetches it, so there is no SSRF surface. -func safeHTTPURL(raw string) string { - raw = strings.TrimSpace(raw) - if raw == "" { - return "" - } - u, err := url.Parse(raw) - if err != nil || (u.Scheme != "http" && u.Scheme != "https") { - return "" - } - return raw -} - // parseTime parses TT-RSS's "YYYY-MM-DD HH:MM:SS" updated stamp, falling back // to RFC3339. Returns 0 (unknown) when unparseable. func parseTime(v string) int64 { From 4fe15225211dded0c68dadd78ff97100048da7e8 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:06:25 -0500 Subject: [PATCH 02/15] fix(auth): equalize login timing on unknown username A missing username returned in ~1ms while a real one took ~100ms (argon2), leaking account existence via a timing side channel. Run a throwaway argon2id derivation with the live cost params on the user-not-found path so both paths cost the same. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/auth/auth.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 44c01a9..50db309 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -176,6 +176,15 @@ func (a *Auth) VerifyPassword(plain, encoded string) error { return nil } +// equalizeTiming runs a throwaway argon2id derivation with the live cost +// params so the user-not-found path costs the same as a real VerifyPassword. +// The result is discarded — only the elapsed time matters. +func (a *Auth) equalizeTiming(password string) { + var salt [16]byte // fixed; the derivation is never compared, only timed + _ = argon2.IDKey([]byte(password), salt[:], + a.Params.Iterations, a.Params.Memory, a.Params.Parallelism, a.Params.KeyLength) +} + func decodeArgon2id(encoded string) (Params, []byte, []byte, error) { parts := strings.Split(encoded, "$") if len(parts) != 6 || parts[1] != "argon2id" { @@ -390,6 +399,10 @@ func (a *Auth) BootstrapAdmin(ctx context.Context, username, password string) (m func (a *Auth) Login(ctx context.Context, w http.ResponseWriter, r *http.Request, username, password string) (models.User, error) { u, err := a.Store.GetUserByUsername(ctx, username) if errors.Is(err, store.ErrNotFound) { + // Equalize timing with the found-user path: without a throwaway argon2 + // derivation, a missing username returns in ~1ms while a real one takes + // ~100ms, leaking account existence via a timing side channel. + a.equalizeTiming(password) return models.User{}, ErrInvalidCredentials } if err != nil { From 6ba932fec4df666cec8db5841728035c698f1325 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:06:25 -0500 Subject: [PATCH 03/15] fix(store): don't fetch user secrets for the user-list endpoint handleListUsers projected away secrets in Go but ListUsers still SELECTed password_hash and fever_token into memory. Add ListUsersPublic (id, username, email, is_admin, created_at) so those columns never leave the database for a request that only needs identity and role. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/api/user_handlers.go | 2 +- internal/store/users.go | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/internal/api/user_handlers.go b/internal/api/user_handlers.go index 233f1e8..4ccdd00 100644 --- a/internal/api/user_handlers.go +++ b/internal/api/user_handlers.go @@ -32,7 +32,7 @@ type adminUserView struct { } func (d *Dependencies) handleListUsers(w http.ResponseWriter, r *http.Request) { - users, err := d.Store.ListUsers(r.Context()) + users, err := d.Store.ListUsersPublic(r.Context()) if mapStoreError(w, err) { return } diff --git a/internal/store/users.go b/internal/store/users.go index ced19c0..719d51a 100644 --- a/internal/store/users.go +++ b/internal/store/users.go @@ -92,6 +92,42 @@ func (s *Store) ListUsers(ctx context.Context) ([]models.User, error) { return out, rows.Err() } +// PublicUser is the non-secret projection of a user. It deliberately excludes +// password_hash, fever_token, and settings_json so those columns never leave +// the database for a request (the user-list endpoint) that only needs identity +// and role. +type PublicUser struct { + ID int64 + Username string + Email string + IsAdmin bool + CreatedAt int64 +} + +// ListUsersPublic returns all users without their secret columns, ordered by +// id. Backs the /api/users endpoint; the non-admin caller path projects this +// down further to id+username. +func (s *Store) ListUsersPublic(ctx context.Context) ([]PublicUser, error) { + rows, err := s.DB.QueryContext(ctx, ` + SELECT id, username, IFNULL(email,''), is_admin, created_at + FROM users ORDER BY id`) + if err != nil { + return nil, err + } + defer rows.Close() + var out []PublicUser + for rows.Next() { + var u PublicUser + var isAdmin int + if err := rows.Scan(&u.ID, &u.Username, &u.Email, &isAdmin, &u.CreatedAt); err != nil { + return nil, err + } + u.IsAdmin = isAdmin == 1 + out = append(out, u) + } + return out, rows.Err() +} + // CountUsers returns the number of users (used by first-run bootstrap). func (s *Store) CountUsers(ctx context.Context) (int, error) { var n int From c6a2115466fd79261f3fa99a57f137336dafd41e Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:06:25 -0500 Subject: [PATCH 04/15] fix(api): don't leak TT-RSS network errors to the client The TT-RSS API import returned raw err.Error(), exposing the resolved endpoint, internal hostnames, and DNS/TLS detail. Log the full error server-side and return a generic message, mirroring the SMTP-test handler. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/api/feed_handlers.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/api/feed_handlers.go b/internal/api/feed_handlers.go index 98a2ca7..05eff6d 100644 --- a/internal/api/feed_handlers.go +++ b/internal/api/feed_handlers.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "errors" + "log/slog" "net/http" "time" @@ -342,7 +343,12 @@ func (d *Dependencies) handleTTRSSAPIImport(w http.ResponseWriter, r *http.Reque ImportArchived: req.ImportArchived, }) if err != nil { - writeError(w, http.StatusBadGateway, "import_failed", err.Error()) + // Log the full error server-side for diagnosis; return a generic + // message. Raw net/http / DNS / TLS errors carry the resolved endpoint, + // internal hostnames, and TLS detail that shouldn't reach the client. + slog.Default().Warn("ttrss api import failed", "url", req.URL, "err", err) + writeError(w, http.StatusBadGateway, "import_failed", + "could not import from TT-RSS — check the URL/credentials and the server logs.") return } writeData(w, http.StatusOK, res, nil) From 7d01926d32131cd53c50832ce65560cd23ca0d77 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:38:00 -0500 Subject: [PATCH 05/15] fix(feed): validate the discovered feed link in Discover Discover() returned the alternate URL without running it through the SSRF validator, while DiscoverAll already validates each discovered link. Validate before returning; on reject/unresolvable, fall through to the fallback probes (which validate each) rather than handing back an unchecked URL. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/feed/discover.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/internal/feed/discover.go b/internal/feed/discover.go index 9f8d6d7..f5e2f57 100644 --- a/internal/feed/discover.go +++ b/internal/feed/discover.go @@ -70,8 +70,16 @@ func Discover(ctx context.Context, c *http.Client, target string, validate func( } if href := findAlternateInHTML(body); href != "" { - abs, _ := resolveRef(parsedTarget, href) - return abs, nil + if abs, rerr := resolveRef(parsedTarget, href); rerr == nil && abs != "" { + // The discovered link crosses the same trust boundary as the + // target; validate before returning it (the caller fetches it). + // On reject/unresolvable, fall through to the fallback probes — + // matching DiscoverAll's drop-and-continue rather than handing back + // an unchecked URL. + if verr := validate(abs); verr == nil { + return abs, nil + } + } } for _, p := range DiscoveryFallbacks { From 3012d8ca966464cdf542ffb5707e2da2b1e59310 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:38:00 -0500 Subject: [PATCH 06/15] fix(ttrss): thread request ctx into the API redirect guard apiClient built its redirect SSRF guard with context.Background(), so the per-redirect validation ignored the import request's cancellation and deadline. Pass the call's ctx through instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/ttrss/api.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/ttrss/api.go b/internal/ttrss/api.go index e08af72..e6fce29 100644 --- a/internal/ttrss/api.go +++ b/internal/ttrss/api.go @@ -55,7 +55,7 @@ func (s *Service) ImportFromAPI(ctx context.Context, userID int64, opt APIOption return res, fmt.Errorf("ttrss: URL rejected: %w", err) } } - client := s.apiClient() + client := s.apiClient(ctx) sid, err := s.login(ctx, client, endpoint, opt.Username, opt.Password) if err != nil { @@ -94,14 +94,17 @@ func apiEndpoint(base string) string { return base + "/api/" } -func (s *Service) apiClient() *http.Client { +// apiClient builds the HTTP client for the live pull. ctx is the import +// request's context, threaded into the redirect SSRF guard so a slow redirect +// check honors the caller's cancellation/deadline rather than running detached. +func (s *Service) apiClient(ctx context.Context) *http.Client { if s.HTTPClient != nil { return s.HTTPClient } c := &http.Client{Timeout: apiCallTimeout} if s.ValidateURL != nil { c.CheckRedirect = feed.RedirectGuard(func(raw string) error { - return s.ValidateURL(context.Background(), raw) + return s.ValidateURL(ctx, raw) }) } return c From fdb78a549bd3253789cff26841872abd85dbef3d Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:38:00 -0500 Subject: [PATCH 07/15] fix(search): return 400 on malformed FTS5 query, not 500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A syntactically-invalid search (unbalanced quote, bare boolean operator, bad column filter) surfaced as a 500. Detect the FTS5 MATCH-syntax error classes, return store.ErrInvalidQuery, and map it to 400 — it's client input, not a server fault. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/api/render.go | 2 ++ internal/store/search.go | 21 +++++++++++++++++++++ internal/store/search_test.go | 23 +++++++++++++++++++++++ internal/store/store.go | 5 +++++ 4 files changed, 51 insertions(+) diff --git a/internal/api/render.go b/internal/api/render.go index 4e17962..6ce760c 100644 --- a/internal/api/render.go +++ b/internal/api/render.go @@ -57,6 +57,8 @@ func mapStoreError(w http.ResponseWriter, err error) bool { writeError(w, http.StatusForbidden, "forbidden", "forbidden") case errors.Is(err, store.ErrConflict): writeError(w, http.StatusConflict, "conflict", "resource already exists") + case errors.Is(err, store.ErrInvalidQuery): + writeError(w, http.StatusBadRequest, "bad_request", "invalid search query") default: slog.Default().Error("store error", "err", err) writeError(w, http.StatusInternalServerError, "internal", "internal error") diff --git a/internal/store/search.go b/internal/store/search.go index 3dcbb09..b202f32 100644 --- a/internal/store/search.go +++ b/internal/store/search.go @@ -2,6 +2,8 @@ package store import ( "context" + "fmt" + "strings" "github.com/brandonhon/ember/internal/models" ) @@ -37,6 +39,12 @@ func (s *Store) Search(ctx context.Context, userID int64, query string, limit in ORDER BY rank LIMIT ?`, userID, userID, query, limit) if err != nil { + // A malformed MATCH expression (unbalanced quote, bare operator, bad + // column filter) is a client mistake, not a server fault — surface it + // as ErrInvalidQuery so the api layer returns 400 instead of 500. + if isFTSQueryError(err) { + return nil, fmt.Errorf("%w: %v", ErrInvalidQuery, err) + } return nil, err } defer rows.Close() @@ -57,3 +65,16 @@ func (s *Store) Search(ctx context.Context, userID int64, query string, limit in } return out, rows.Err() } + +// isFTSQueryError reports whether err is a SQLite FTS5 MATCH-syntax error +// caused by malformed user input (vs. a genuine infrastructure fault). The +// phrases below are the full set observed from modernc.org/sqlite for bad +// queries: unbalanced quote, bare operator, unknown column filter, and the +// "special query" prefix-search error. +func isFTSQueryError(err error) bool { + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "fts5:") || + strings.Contains(msg, "unterminated string") || + strings.Contains(msg, "unknown special query") || + strings.Contains(msg, "no such column") +} diff --git a/internal/store/search_test.go b/internal/store/search_test.go index 2059f11..cfea77b 100644 --- a/internal/store/search_test.go +++ b/internal/store/search_test.go @@ -2,6 +2,7 @@ package store import ( "context" + "errors" "testing" "github.com/brandonhon/ember/internal/models" @@ -42,6 +43,28 @@ func TestSearch_RankedHits(t *testing.T) { } } +func TestSearch_MalformedQueryIsInvalid(t *testing.T) { + s := NewTest(t) + ctx := context.Background() + u, _ := s.CreateUser(ctx, models.User{Username: "u", PasswordHash: "h"}) + f, _ := s.UpsertFeed(ctx, models.Feed{URL: "https://x.test/f", Title: "X"}) + _, _ = s.Subscribe(ctx, models.Subscription{UserID: u.ID, FeedID: f.ID}) + + // Each of these is a distinct FTS5 syntax failure class; all must map to + // ErrInvalidQuery (-> 400) rather than a bare error (-> 500). + for _, q := range []string{`"`, `(`, `NOT NOT`, `AND`, `foo:`, `*`} { + _, err := s.Search(ctx, u.ID, q, 10) + if !errors.Is(err, ErrInvalidQuery) { + t.Errorf("query %q: want ErrInvalidQuery, got %v", q, err) + } + } + + // A valid query against an empty index is not an error. + if _, err := s.Search(ctx, u.ID, "rust", 10); err != nil { + t.Errorf("valid query errored: %v", err) + } +} + func TestSearch_ScopedToSubscriptions(t *testing.T) { s := NewTest(t) ctx := context.Background() diff --git a/internal/store/store.go b/internal/store/store.go index 215006f..2e64498 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -22,6 +22,11 @@ var ErrForbidden = errors.New("store: forbidden") // duplicate category name within a user, etc.). var ErrConflict = errors.New("store: conflict") +// ErrInvalidQuery is returned by Search when the user's text is not a valid +// FTS5 MATCH expression (unbalanced quote, bare boolean operator, bad column +// filter). The api layer maps it to 400 rather than a 500. +var ErrInvalidQuery = errors.New("store: invalid search query") + // ErrNoNewContent is returned by Poller.ExtractArticle when readability ran // but the result wasn't an improvement over the stored body. Defined here so // the api package can errors.Is without importing poller (which would create From b7113f8a41506b39c41b371bd601c6d990adaa00 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:50:26 -0500 Subject: [PATCH 08/15] fix(auth): enforce 8-char minimum on bootstrap admin password BootstrapAdmin accepted any non-empty EMBER_ADMIN_PASSWORD while the create-user and change-password paths enforce an 8-char floor. Apply the same minimum so the first-run admin can't be seeded weak. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/auth/auth.go | 5 +++++ internal/auth/auth_test.go | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 50db309..7d352ca 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -376,6 +376,11 @@ func (a *Auth) BootstrapAdmin(ctx context.Context, username, password string) (m if username == "" || password == "" { return models.User{}, false, errors.New("auth: bootstrap admin requires EMBER_ADMIN_USER and EMBER_ADMIN_PASSWORD") } + // Enforce the same 8-char floor as the create-user/change-password paths so + // the first-run admin can't be seeded with a trivially weak password. + if len(password) < 8 { + return models.User{}, false, errors.New("auth: EMBER_ADMIN_PASSWORD must be at least 8 characters") + } hash, err := a.HashPassword(password) if err != nil { return models.User{}, false, err diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index 6f74ac2..dc658c6 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -294,6 +294,12 @@ func TestBootstrapAdmin(t *testing.T) { if _, _, err := a2.BootstrapAdmin(ctx, "", ""); err == nil { t.Error("empty creds should fail") } + + // Sub-8-char password on first run → error (no admin created). + a3 := newAuth(t) + if _, created, err := a3.BootstrapAdmin(ctx, "root", "short7!"); err == nil || created { + t.Errorf("short password should fail: created=%v err=%v", created, err) + } } func TestLogin(t *testing.T) { From 1d8bf9638370f157b1d79a8db9ea54c14f2a064d Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:50:26 -0500 Subject: [PATCH 09/15] refactor(feed): bake the SSRF redirect guard into NewFetcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NewFetcher built an unguarded client and relied on every caller to set CheckRedirect afterward. Make validate a constructor parameter and wire RedirectGuard in at construction so the safe default is the only option — a caller can't accidentally build a fetcher that follows redirects to private addresses. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/ember/main.go | 6 +++--- internal/feed/fetch.go | 14 +++++++++++--- internal/feed/fetch_test.go | 8 ++++---- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cmd/ember/main.go b/cmd/ember/main.go index 1e3e7b5..15d8f74 100644 --- a/cmd/ember/main.go +++ b/cmd/ember/main.go @@ -279,9 +279,9 @@ func run() error { sum = ollamaSum } - fetcher := feed.NewFetcher(30 * time.Second) - // Block redirects to private/internal addresses on every feed fetch. - fetcher.Client.CheckRedirect = feed.RedirectGuard(func(rawURL string) error { + // Block redirects to private/internal addresses on every feed fetch — the + // guard is baked into the fetcher at construction. + fetcher := feed.NewFetcher(30*time.Second, func(rawURL string) error { return urlcheck.Check(ctx, rawURL, cfg.AllowPrivateURLs) }) p := poller.New(st, fetcher, sum, poller.Config{ diff --git a/internal/feed/fetch.go b/internal/feed/fetch.go index 219e7d8..f0a534f 100644 --- a/internal/feed/fetch.go +++ b/internal/feed/fetch.go @@ -46,13 +46,21 @@ func RedirectGuard(validate func(rawURL string) error) func(*http.Request, []*ht } } -// NewFetcher returns a Fetcher with sensible defaults. -func NewFetcher(timeout time.Duration) *Fetcher { +// NewFetcher returns a Fetcher with sensible defaults and an SSRF redirect +// guard wired in at construction, so the safe default is the only option — a +// caller can't accidentally build an unguarded fetcher. validate is invoked on +// every redirect hop's next URL (pass urlcheck.Check); nil still enforces the +// 10-redirect cap but performs no address check, so production must supply a +// real validator. +func NewFetcher(timeout time.Duration, validate func(rawURL string) error) *Fetcher { if timeout <= 0 { timeout = 30 * time.Second } return &Fetcher{ - Client: &http.Client{Timeout: timeout}, + Client: &http.Client{ + Timeout: timeout, + CheckRedirect: RedirectGuard(validate), + }, UserAgent: DefaultUserAgent, } } diff --git a/internal/feed/fetch_test.go b/internal/feed/fetch_test.go index 9fab3ba..775c4a7 100644 --- a/internal/feed/fetch_test.go +++ b/internal/feed/fetch_test.go @@ -35,7 +35,7 @@ func TestFetch_ConditionalGET_304(t *testing.T) { })) defer srv.Close() - f := NewFetcher(5 * time.Second) + f := NewFetcher(5*time.Second, nil) res, err := f.Fetch(context.Background(), srv.URL, etag, lastMod) if err != nil { t.Fatal(err) @@ -65,7 +65,7 @@ func TestFetch_200ReturnsBody(t *testing.T) { })) defer srv.Close() - f := NewFetcher(5 * time.Second) + f := NewFetcher(5*time.Second, nil) res, err := f.Fetch(context.Background(), srv.URL, "", "") if err != nil { t.Fatal(err) @@ -87,7 +87,7 @@ func TestFetch_5xxIsError(t *testing.T) { })) defer srv.Close() - f := NewFetcher(5 * time.Second) + f := NewFetcher(5*time.Second, nil) _, err := f.Fetch(context.Background(), srv.URL, "", "") if err == nil { t.Fatal("expected error on 500") @@ -100,7 +100,7 @@ func TestFetch_Timeout(t *testing.T) { })) defer srv.Close() - f := NewFetcher(50 * time.Millisecond) + f := NewFetcher(50*time.Millisecond, nil) _, err := f.Fetch(context.Background(), srv.URL, "", "") if err == nil { t.Fatal("expected timeout error") From 276246f3feca0d4f20cdf593ebee9e93e4f1bb96 Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:52:12 -0500 Subject: [PATCH 10/15] chore(deps): add bluemonday for HTML sanitization Promotes microcosm-cc/bluemonday (+ douceur, gorilla/css) to direct deps for the feed.SanitizeHTML sanitizer introduced in this backport. Co-Authored-By: Claude Opus 4.8 (1M context) --- go.mod | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go.mod b/go.mod index 791d616..c57cb11 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/go-shiori/go-readability v0.0.0-20251205110129-5db1dc9836f0 github.com/go-webauthn/webauthn v0.17.4 github.com/gorilla/securecookie v1.1.2 + github.com/microcosm-cc/bluemonday v1.0.27 github.com/mmcdole/gofeed v1.3.0 github.com/pressly/goose/v3 v3.27.1 golang.org/x/crypto v0.52.0 @@ -18,6 +19,7 @@ require ( github.com/PuerkitoBio/goquery v1.8.0 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect + github.com/aymerick/douceur v0.2.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/fxamacker/cbor/v2 v2.9.2 // indirect github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect @@ -27,6 +29,7 @@ require ( github.com/golang-jwt/jwt/v5 v5.3.1 // indirect github.com/google/go-tpm v0.9.8 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/css v1.0.1 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mattn/go-isatty v0.0.21 // indirect github.com/mfridman/interpolate v0.0.2 // indirect From 75efcabae2ed4cc8414c8adaefeb270512fc758c Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:59:57 -0500 Subject: [PATCH 11/15] fix(feed): require a host in SafeHTTPURL Fuzzing surfaced that a scheme-only value like "HTTP:" (or an opaque "https:foo") passed the guard and was returned as a "safe" URL despite having no host to link/fetch. Not a javascript:/data: bypass, but a sloppy result for a value rendered as an href/src. Require u.Host != "". Relative links are resolved to absolute before this call, so legitimate URLs are unaffected. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/feed/sanitize.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/feed/sanitize.go b/internal/feed/sanitize.go index dea1ca2..2fca1fc 100644 --- a/internal/feed/sanitize.go +++ b/internal/feed/sanitize.go @@ -35,7 +35,11 @@ func SafeHTTPURL(raw string) string { return "" } u, err := url.Parse(raw) - if err != nil || (u.Scheme != "http" && u.Scheme != "https") { + // Require an http(s) scheme AND a host: a scheme-only value like "http:" + // or an opaque "https:foo" has no host to fetch/link and isn't a usable + // web URL. (url.Parse lowercases the scheme, so the check is case- + // insensitive and still rejects javascript:/data:.) + if err != nil || (u.Scheme != "http" && u.Scheme != "https") || u.Host == "" { return "" } return raw From 87d3f3368a395c04735a3859664f4d985dcd475e Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:29:29 -0500 Subject: [PATCH 12/15] fix(api): validate Ollama model names on set/pull/delete The admin LLM endpoints passed the model name verbatim to the Ollama API. Constrain it to the documented [registry/][namespace/]name[:tag] shape so a rogue/compromised admin can't coax Ollama into pulling from an arbitrary registry or dereferencing path-traversal components. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/api/llm_handlers.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/internal/api/llm_handlers.go b/internal/api/llm_handlers.go index 538db7d..9ab0f8e 100644 --- a/internal/api/llm_handlers.go +++ b/internal/api/llm_handlers.go @@ -4,6 +4,7 @@ import ( "context" "log/slog" "net/http" + "regexp" "strconv" "time" @@ -11,6 +12,13 @@ import ( "github.com/brandonhon/ember/internal/sysinfo" ) +// validModelName bounds an Ollama model reference to its documented shape +// ([registry/][namespace/]name[:tag]) and printable ASCII. Even though these +// endpoints are admin-only, validating the name stops a compromised/rogue admin +// from coaxing the Ollama daemon into pulling from an arbitrary registry or +// dereferencing path-traversal components. +var validModelName = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._:/-]{0,255}$`) + func floatToStr(f float64) string { return strconv.FormatFloat(f, 'f', -1, 64) } func intToStr(i int) string { return strconv.Itoa(i) } @@ -72,6 +80,10 @@ func (d *Dependencies) handleSetLLMModel(w http.ResponseWriter, r *http.Request) writeError(w, http.StatusBadRequest, "bad_request", "model required") return } + if !validModelName.MatchString(req.Model) { + writeError(w, http.StatusBadRequest, "bad_request", "invalid model name") + return + } if err := d.Store.PutAppSetting(r.Context(), "ollama_model", req.Model); err != nil { internalError(w, "internal", err) return @@ -141,6 +153,10 @@ func (d *Dependencies) handleDeleteLLMModel(w http.ResponseWriter, r *http.Reque writeError(w, http.StatusBadRequest, "bad_request", "model required") return } + if !validModelName.MatchString(req.Model) { + writeError(w, http.StatusBadRequest, "bad_request", "invalid model name") + return + } if req.Model == d.Ollama.Model() { writeError(w, http.StatusConflict, "active_model", "cannot delete the active model — switch first") return @@ -170,6 +186,10 @@ func (d *Dependencies) handlePullLLMModel(w http.ResponseWriter, r *http.Request writeError(w, http.StatusBadRequest, "bad_request", "model required") return } + if !validModelName.MatchString(req.Model) { + writeError(w, http.StatusBadRequest, "bad_request", "invalid model name") + return + } // Override per-connection deadlines so the response can stay open for // the duration of the pull. http.NewResponseController is the modern // way to do this; errors mean the server doesn't support it (very old From 4eb28d147c22aab6d186ed2bcedce2f6273e359a Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:29:40 -0500 Subject: [PATCH 13/15] harden(feed): require a guarded HTTP client in ExtractFromURL The nil-client fallback built an unguarded client (no SSRF redirect/dial guard). It's never used in production (the poller passes a guarded client), so make nil an explicit error rather than a silent footgun. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/feed/readability.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/feed/readability.go b/internal/feed/readability.go index 30a59c7..c82d09e 100644 --- a/internal/feed/readability.go +++ b/internal/feed/readability.go @@ -6,7 +6,6 @@ import ( "net/http" "net/url" "strings" - "time" "github.com/go-shiori/go-readability" ) @@ -23,7 +22,10 @@ type Readable struct { // readability-extracted view. func ExtractFromURL(ctx context.Context, c *http.Client, target string) (Readable, error) { if c == nil { - c = &http.Client{Timeout: 30 * time.Second} + // Require a caller-supplied client: the SSRF guard (redirect + dial) + // lives on it, so a nil client would be an unguarded fetch. Callers + // build a guarded client (see poller.enrichWithReadability). + return Readable{}, errors.New("readability: nil http client (SSRF guard required)") } req, err := http.NewRequestWithContext(ctx, http.MethodGet, target, nil) if err != nil { From 60a65662eea09bf477a152a4fc14af618260967f Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:51:39 -0500 Subject: [PATCH 14/15] fix(security): pin resolved IP on outbound clients to close DNS rebinding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit urlcheck.Check validated the hostname, but the HTTP stack resolved DNS again at dial time — an attacker controlling DNS could answer public to the check and private to the dial (TOCTOU rebind). Add urlcheck.DialContext / GuardedTransport, which re-resolves, rejects any private resolved IP, and dials the pinned address. Wire it into the feed fetcher and the poller's readability client (alongside the existing redirect guards). (Backport of develop 57a9db1; the push-notifier wiring is omitted — that feature isn't on main.) Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/ember/main.go | 3 ++ internal/poller/poller.go | 3 +- internal/urlcheck/urlcheck.go | 56 ++++++++++++++++++++++++++++++ internal/urlcheck/urlcheck_test.go | 10 ++++++ 4 files changed, 71 insertions(+), 1 deletion(-) diff --git a/cmd/ember/main.go b/cmd/ember/main.go index 15d8f74..05a9c66 100644 --- a/cmd/ember/main.go +++ b/cmd/ember/main.go @@ -284,6 +284,9 @@ func run() error { fetcher := feed.NewFetcher(30*time.Second, func(rawURL string) error { return urlcheck.Check(ctx, rawURL, cfg.AllowPrivateURLs) }) + // IP-pinning transport closes the DNS-rebind window between the pre-flight + // urlcheck and the actual dial (the redirect guard only covers 3xx hops). + fetcher.Client.Transport = urlcheck.GuardedTransport(cfg.AllowPrivateURLs) p := poller.New(st, fetcher, sum, poller.Config{ Tick: cfg.PollTick, Concurrency: cfg.PollConcurrency, diff --git a/internal/poller/poller.go b/internal/poller/poller.go index b188a74..a54a1a1 100644 --- a/internal/poller/poller.go +++ b/internal/poller/poller.go @@ -524,7 +524,8 @@ func (p *Poller) enrichWithReadability(ctx context.Context, a *models.Article) { // address. Mirror the SSRF guard wired into the feed Fetcher + discovery // client. client := &http.Client{ - Timeout: 15 * time.Second, + Timeout: 15 * time.Second, + Transport: urlcheck.GuardedTransport(p.Config.AllowPrivateURLs), CheckRedirect: feed.RedirectGuard(func(rawURL string) error { return urlcheck.Check(rctx, rawURL, p.Config.AllowPrivateURLs) }), diff --git a/internal/urlcheck/urlcheck.go b/internal/urlcheck/urlcheck.go index 7066a8b..8cc41dc 100644 --- a/internal/urlcheck/urlcheck.go +++ b/internal/urlcheck/urlcheck.go @@ -16,8 +16,10 @@ import ( "errors" "fmt" "net" + "net/http" "net/url" "strings" + "time" ) // ErrPrivate is returned when the URL resolves to a private/loopback/CGNAT @@ -101,6 +103,60 @@ func CheckWith(ctx context.Context, raw string, allowPrivate bool, resolve Resol return nil } +// DialContext returns a net.Dialer-style DialContext that re-resolves the host, +// rejects any resolved IP that fails the private-address check, and dials the +// pinned IP directly. This closes the DNS-rebinding TOCTOU window: Check +// validates the name at request time, but the stdlib HTTP stack would resolve +// again at dial time — an attacker controlling DNS could return a public IP to +// Check and a private one to the dialer. Pinning the checked IP removes the +// second lookup. allowPrivate skips the check (homelab opt-in). +func DialContext(allowPrivate bool) func(ctx context.Context, network, addr string) (net.Conn, error) { + d := &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second} + return func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return nil, err + } + if allowPrivate { + return d.DialContext(ctx, network, addr) + } + if ip := net.ParseIP(host); ip != nil { + if isPrivate(ip) { + return nil, fmt.Errorf("%w: %s", ErrPrivate, ip) + } + return d.DialContext(ctx, network, addr) + } + ips, err := defaultResolver(ctx, host) + if err != nil { + return nil, fmt.Errorf("urlcheck: resolve %s: %w", host, err) + } + lastErr := error(fmt.Errorf("urlcheck: no usable address for %s", host)) + for _, ip := range ips { + if isPrivate(ip) { + lastErr = fmt.Errorf("%w: %s -> %s", ErrPrivate, host, ip) + continue + } + conn, derr := d.DialContext(ctx, network, net.JoinHostPort(ip.String(), port)) + if derr == nil { + return conn, nil + } + lastErr = derr + } + return nil, lastErr + } +} + +// GuardedTransport returns an *http.Transport (cloned from the default so proxy, +// TLS, and timeout defaults are preserved) whose DialContext pins the resolved +// IP against the private-address check. Set it as a client's Transport to make +// the SSRF guard cover the actual connect, complementing the pre-flight Check +// and the redirect guard. +func GuardedTransport(allowPrivate bool) *http.Transport { + tr := http.DefaultTransport.(*http.Transport).Clone() + tr.DialContext = DialContext(allowPrivate) + return tr +} + func isPrivate(ip net.IP) bool { for _, b := range privateBlocks { if b.Contains(ip) { diff --git a/internal/urlcheck/urlcheck_test.go b/internal/urlcheck/urlcheck_test.go index 60ce1e8..50c1090 100644 --- a/internal/urlcheck/urlcheck_test.go +++ b/internal/urlcheck/urlcheck_test.go @@ -86,3 +86,13 @@ func TestCheck_AllowPrivateBypass(t *testing.T) { t.Errorf("allowPrivate=true should bypass: %v", err) } } + +func TestDialContext_RejectsPrivateLiteral(t *testing.T) { + dial := DialContext(false) + // A literal private IP is rejected before any connection attempt. + for _, addr := range []string{"127.0.0.1:80", "169.254.169.254:80", "10.0.0.5:443"} { + if _, err := dial(context.Background(), "tcp", addr); !errors.Is(err, ErrPrivate) { + t.Errorf("dial %s: want ErrPrivate, got %v", addr, err) + } + } +} From bd5af4c38edae8ab53f7fcd8837cd23b95f80deb Mon Sep 17 00:00:00 2001 From: Brandon Honeycutt <697896+brandonhon@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:52:17 -0500 Subject: [PATCH 15/15] fix(filters): cap filters per user and regex pattern length Each filter's regex is compiled and cached for the process lifetime (reCache, no eviction), so an authenticated user creating unbounded filters/patterns is a slow-burn memory DoS. Cap at 200 filters per user and 512 chars per regex. RE2 already rules out backtracking ReDoS. (main-targeted equivalent of develop cad70d0, which doesn't cherry-pick cleanly because develop's filters is the enhanced PR #78 version.) Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/api/filter_handlers.go | 16 ++++++++++++++++ internal/filters/filters.go | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/internal/api/filter_handlers.go b/internal/api/filter_handlers.go index 242579f..8ce7834 100644 --- a/internal/api/filter_handlers.go +++ b/internal/api/filter_handlers.go @@ -9,6 +9,10 @@ import ( "github.com/brandonhon/ember/internal/store" ) +// maxFiltersPerUser bounds how many filters one account can create; see +// handleCreateFilter. +const maxFiltersPerUser = 200 + type filterReq struct { Name string `json:"name"` MatchJSON string `json:"match_json"` @@ -44,6 +48,18 @@ func (d *Dependencies) handleCreateFilter(w http.ResponseWriter, r *http.Request writeError(w, http.StatusBadRequest, "bad_request", err.Error()) return } + // Cap filters per user: each filter's regex is compiled and cached for the + // process lifetime, so an unbounded count is a memory-DoS vector. 200 is + // far beyond any realistic use. + existing, err := d.Store.ListFilters(r.Context(), u.ID) + if mapStoreError(w, err) { + return + } + if len(existing) >= maxFiltersPerUser { + writeError(w, http.StatusBadRequest, "filter_limit", + "filter limit reached (max 200 per user)") + return + } enabled := true if req.Enabled != nil { enabled = *req.Enabled diff --git a/internal/filters/filters.go b/internal/filters/filters.go index bd83de4..dfa9d4f 100644 --- a/internal/filters/filters.go +++ b/internal/filters/filters.go @@ -98,6 +98,13 @@ func (m Match) Validate() error { return fmt.Errorf("filters: value required") } if m.Op == OpMatches { + // Bound the pattern length: each distinct pattern is compiled and + // cached forever (reCache), so unbounded patterns are a memory-DoS + // vector. RE2 rules out catastrophic backtracking; this caps per-entry + // size. 512 is far beyond any legitimate filter. + if len(m.Value) > maxPatternLen { + return fmt.Errorf("filters: regex too long (max %d chars)", maxPatternLen) + } if _, err := regexp.Compile(m.Value); err != nil { return fmt.Errorf("filters: invalid regex %q: %w", m.Value, err) } @@ -105,6 +112,9 @@ func (m Match) Validate() error { return nil } +// maxPatternLen caps a single match regex; see Match.Validate. +const maxPatternLen = 512 + // ValidateAction returns an error for unknown action strings. func ValidateAction(a string) error { switch Action(a) {