From 175b38a6b4b3a617072c9e73642825d24c03c8ba Mon Sep 17 00:00:00 2001 From: csd113 Date: Mon, 9 Mar 2026 08:03:58 -0700 Subject: [PATCH 1/7] fixed console spacing --- src/main.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 3ca1b30..af99458 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1057,12 +1057,8 @@ fn print_keyboard_help() { println!(); println!(" \x1b[36m╔══ Admin Console ════════════════════════════════╗\x1b[0m"); println!(" \x1b[36m║\x1b[0m [s] show stats now [l] list boards \x1b[36m║\x1b[0m"); - println!( - " \x1b[36m║\x1b[0m [c] create board [d] delete thread \x1b[36m║\x1b[0m" - ); - println!( - " \x1b[36m║\x1b[0m [h] help [q] quit hint \x1b[36m║\x1b[0m" - ); + println!(" \x1b[36m║\x1b[0m [c] create board [d] delete thread \x1b[36m║\x1b[0m"); + println!(" \x1b[36m║\x1b[0m [h] help [q] quit hint \x1b[36m║\x1b[0m"); println!(" \x1b[36m╚═════════════════════════════════════════════════╝\x1b[0m"); println!(); } From 628ee155faa7aa5c7fe3440d394abe1454ce3410 Mon Sep 17 00:00:00 2001 From: csd113 Date: Mon, 9 Mar 2026 11:29:54 -0700 Subject: [PATCH 2/7] bug fix and removed dead code --- Cargo.lock | 1 + Cargo.toml | 7 +- src/config.rs | 55 +++++++++ src/handlers/admin.rs | 13 +++ src/handlers/board.rs | 251 +++++++--------------------------------- src/handlers/mod.rs | 195 +++++++++++++++++++++++++++++++ src/handlers/thread.rs | 215 ++++++---------------------------- src/main.rs | 64 ++++++++-- src/templates/admin.rs | 17 +-- src/templates/board.rs | 5 - src/templates/mod.rs | 75 ++++++------ src/templates/thread.rs | 2 - 12 files changed, 439 insertions(+), 461 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0171e0..8059c5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1270,6 +1270,7 @@ dependencies = [ "image", "libc", "once_cell", + "parking_lot", "r2d2", "r2d2_sqlite", "rand_core 0.6.4", diff --git a/Cargo.toml b/Cargo.toml index dd2908d..e42ab1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,9 +58,10 @@ clap = { version = "4", features = ["derive"] } uuid = { version = "1", features = ["v4"] } chrono = { version = "0.4", features = ["serde"] } -dashmap = "6" -anyhow = "1" -once_cell = "1" +dashmap = "6" +parking_lot = "0.12" +anyhow = "1" +once_cell = "1" regex = "1" # zip 8: SimpleFileOptions and core ZipWriter/ZipArchive API unchanged. diff --git a/src/config.rs b/src/config.rs index 190753f..64087d2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -343,6 +343,61 @@ impl Config { } } +/// Update `forum_name` and `site_subtitle` in `settings.toml` in-place, +/// preserving all other lines and comments. +/// +/// Called by the admin site-settings handler so that changes made via the +/// panel are reflected in the file and survive a restart without the operator +/// needing to hand-edit `settings.toml`. +/// +/// If the key is not yet present in the file the function is a no-op for that +/// key (it won't append new lines — the file is only updated if the key already +/// exists). On a fresh install `generate_settings_file_if_missing` always +/// writes both keys, so this is only a concern for manually-crafted files. +pub fn update_settings_file_site_names(forum_name: &str, site_subtitle: &str) { + let path = settings_file_path(); + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(e) => { + eprintln!("Warning: could not read settings.toml for update: {e}"); + return; + } + }; + + // Replace the value portion of `key = "..."` lines while preserving + // indentation, comments on the same line, and surrounding whitespace. + // We use a simple line-by-line scan so that file comments are untouched. + fn toml_quote(s: &str) -> String { + // Escape backslash and double-quote, then wrap in double quotes. + let inner = s.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{inner}\"") + } + + let trailing_newline = content.ends_with('\n'); + let updated: Vec = content + .lines() + .map(|line| { + // Match `forum_name = ...` (possibly with surrounding spaces). + if line.trim_start().starts_with("forum_name") && line.contains('=') { + return format!("forum_name = {}", toml_quote(forum_name)); + } + if line.trim_start().starts_with("site_subtitle") && line.contains('=') { + return format!("site_subtitle = {}", toml_quote(site_subtitle)); + } + line.to_string() + }) + .collect(); + + let mut out = updated.join("\n"); + if trailing_newline { + out.push('\n'); + } + + if let Err(e) = std::fs::write(&path, out) { + eprintln!("Warning: could not write updated settings.toml: {e}"); + } +} + // ─── Cookie secret rotation check ──────────────────────────────────────────── /// Check whether the cookie_secret has changed since the last run by comparing diff --git a/src/handlers/admin.rs b/src/handlers/admin.rs index 8d8aa29..5aafff5 100644 --- a/src/handlers/admin.rs +++ b/src/handlers/admin.rs @@ -115,6 +115,14 @@ fn clear_login_fails(ip_key: &str) { } } +/// Prune all expired entries from ADMIN_LOGIN_FAILS. +/// Called by a background task every 5 minutes so the map never grows +/// unbounded during sustained brute-force attacks with no successful logins. +pub fn prune_login_fails() { + let now = login_now_secs(); + ADMIN_LOGIN_FAILS.retain(|_, (_, ws)| now.saturating_sub(*ws) <= LOGIN_FAIL_WINDOW); +} + /// Verify admin session. Returns the admin_id if valid. /// NOTE: This function performs blocking DB I/O. Only call it from within a /// spawn_blocking closure or synchronous (non-async) context. @@ -3820,6 +3828,11 @@ pub async fn update_site_settings( crate::templates::set_live_site_subtitle(&new_subtitle); info!("Admin updated site subtitle to: {:?}", new_subtitle); + // Persist both values back to settings.toml so they survive a + // server restart without requiring a manual file edit. + crate::config::update_settings_file_site_names(&new_name, &new_subtitle); + info!("settings.toml updated with new site_name and site_subtitle"); + // Save the default theme slug (validated against allowed values). const VALID_THEMES: &[&str] = &[ "terminal", diff --git a/src/handlers/board.rs b/src/handlers/board.rs index b159141..834c4e1 100644 --- a/src/handlers/board.rs +++ b/src/handlers/board.rs @@ -17,18 +17,10 @@ use crate::{ models::*, templates, utils::{ - // FIX[LOW-8]: sha256_hex now lives in utils::crypto (deduplicated) - crypto::{hash_ip, new_csrf_token, new_deletion_token, sha256_hex, verify_pow}, - files::save_upload, + crypto::{hash_ip, new_csrf_token, new_deletion_token, verify_pow}, sanitize::{ - // FIX[MEDIUM-8]: apply_word_filters now runs before escape_html - apply_word_filters, - escape_html, - render_post_body, - validate_body, - validate_body_with_file, - validate_name, - validate_subject, + apply_word_filters, escape_html, render_post_body, validate_body, + validate_body_with_file, validate_name, validate_subject, }, tripcode::parse_name_tripcode, }, @@ -201,7 +193,7 @@ pub async fn create_thread( // Also extract csrf_token before spawn_blocking so the ban page appeal form works. let ban_csrf_token = csrf_cookie.clone().unwrap_or_default(); - let board_short_err = board_short.clone(); + let _board_short_err = board_short.clone(); let result = tokio::task::spawn_blocking({ let pool = state.db.clone(); let job_queue = state.job_queue.clone(); @@ -282,117 +274,26 @@ pub async fn create_thread( let escaped_body = escape_html(&filtered_body); let body_html = render_post_body(&escaped_body); - let uploaded = if let Some((data, fname)) = file_data { - // Detect media type from magic bytes to enforce per-board toggles. - // We call detect_mime_type here for a quick classification without - // doing a full save; the real detection runs again in save_upload. - let detected_mime = crate::utils::files::detect_mime_type(&data) - .map_err(|e| AppError::BadRequest(e.to_string()))?; - let detected_media = crate::models::MediaType::from_mime(detected_mime) - .ok_or_else(|| AppError::BadRequest("Unsupported file type.".into()))?; - - match detected_media { - crate::models::MediaType::Image if !board.allow_images => { - return Err(AppError::BadRequest( - "Image uploads are disabled on this board.".into(), - )) - } - crate::models::MediaType::Video if !board.allow_video => { - return Err(AppError::BadRequest( - "Video uploads are disabled on this board.".into(), - )) - } - crate::models::MediaType::Audio if !board.allow_audio => { - return Err(AppError::BadRequest( - "Audio uploads are disabled on this board.".into(), - )) - } - _ => {} - } - - // SHA-256 deduplication — FIX[LOW-8]: use sha256_hex from crypto module - let hash = sha256_hex(&data); - if let Some(cached) = db::find_file_by_hash(&conn, &hash)? { - let cached_media = crate::models::MediaType::from_mime(&cached.mime_type) - .unwrap_or(crate::models::MediaType::Image); - Some(crate::utils::files::UploadedFile { - file_path: cached.file_path, - thumb_path: cached.thumb_path, - original_name: crate::utils::sanitize::sanitize_filename(&fname), - mime_type: cached.mime_type, - file_size: data.len() as i64, - media_type: cached_media, - processing_pending: false, // cached = already fully processed - }) - } else { - let f = save_upload( - &data, - &fname, - &upload_dir, - &board.short_name, - thumb_size, - max_image_size, - max_video_size, - max_audio_size, - ffmpeg_available, - ) - .map_err(crate::handlers::classify_upload_error)?; - db::record_file_hash(&conn, &hash, &f.file_path, &f.thumb_path, &f.mime_type)?; - Some(f) - } - } else { - None - }; + let uploaded = crate::handlers::process_primary_upload( + file_data, + &board, + &conn, + &upload_dir, + thumb_size, + max_image_size, + max_video_size, + max_audio_size, + ffmpeg_available, + )?; // ── Image+audio combo ───────────────────────────────────────────── - // If an audio file was also submitted alongside an image, and the - // board permits both, save the audio file using the image's thumb. - let audio_uploaded: Option = - if let Some((aud_data, aud_fname)) = audio_file_data { - // Validate that the board allows audio - if !board.allow_audio { - return Err(AppError::BadRequest( - "Audio uploads are disabled on this board.".into(), - )); - } - // The primary file must be an image for the combo to be valid - let primary_is_image = uploaded - .as_ref() - .map(|u| matches!(u.media_type, crate::models::MediaType::Image)) - .unwrap_or(false); - if !primary_is_image { - return Err(AppError::BadRequest( - "Audio can only be combined with an image upload.".into(), - )); - } - // Confirm it's actually audio via magic bytes - let aud_mime = crate::utils::files::detect_mime_type(&aud_data) - .map_err(|e| AppError::BadRequest(e.to_string()))?; - let aud_media = crate::models::MediaType::from_mime(aud_mime) - .ok_or_else(|| AppError::BadRequest("Unsupported audio type.".into()))?; - if !matches!(aud_media, crate::models::MediaType::Audio) { - return Err(AppError::BadRequest( - "The audio slot only accepts audio files.".into(), - )); - } - - let mut aud_file = crate::utils::files::save_audio_with_image_thumb( - &aud_data, - &aud_fname, - &upload_dir, - &board.short_name, - max_audio_size, - ) - .map_err(crate::handlers::classify_upload_error)?; - - // Use the image thumbnail as the audio's thumbnail - if let Some(ref img) = uploaded { - aud_file.thumb_path = img.thumb_path.clone(); - } - Some(aud_file) - } else { - None - }; + let audio_uploaded = crate::handlers::process_audio_combo( + audio_file_data, + uploaded.as_ref(), + &board, + &upload_dir, + max_audio_size, + )?; let deletion_token = if del_token_val.trim().is_empty() { new_deletion_token() @@ -449,40 +350,15 @@ pub async fn create_thread( } // ── Background jobs ─────────────────────────────────────────────── - // 1. Media post-processing (video transcode / audio waveform) - if let Some(ref up) = uploaded { - if up.processing_pending { - let job = match up.media_type { - crate::models::MediaType::Video => { - Some(crate::workers::Job::VideoTranscode { - post_id, - file_path: up.file_path.clone(), - board_short: board.short_name.clone(), - }) - } - crate::models::MediaType::Audio => { - Some(crate::workers::Job::AudioWaveform { - post_id, - file_path: up.file_path.clone(), - board_short: board.short_name.clone(), - }) - } - _ => None, - }; - if let Some(j) = job { - if let Err(e) = job_queue.enqueue(&j) { - tracing::warn!("Failed to enqueue media job: {}", e); - } - } - } - } - - // 2. Spam analysis - let _ = job_queue.enqueue(&crate::workers::Job::SpamCheck { + // 1 & 2. Media post-processing + spam check (shared helper) + crate::handlers::enqueue_post_jobs( + &job_queue, post_id, - ip_hash: ip_hash.clone(), - body_len: body_text.len(), - }); + &ip_hash, + body_text.len(), + uploaded.as_ref(), + &board.short_name, + ); // 3. Thread pruning — now async so HTTP response returns immediately. let max_threads = board.max_threads; @@ -500,60 +376,14 @@ pub async fn create_thread( .await .map_err(|e| AppError::Internal(anyhow::anyhow!(e)))?; - // BadRequest → re-render the board index with an inline error banner. + // BadRequest → return a lightweight 422 page instead of re-querying the + // entire board index (which wastes significant DB and CPU under spam load). let redirect_url = match result { Ok(url) => url, Err(AppError::BadRequest(msg)) => { - let html = tokio::task::spawn_blocking({ - let pool = state.db.clone(); - let csrf_err = csrf_cookie.clone().unwrap_or_default(); - let board_short = board_short_err.clone(); - let msg = msg.clone(); - move || -> String { - let conn = match pool.get() { - Ok(c) => c, - Err(_) => return String::new(), - }; - let board = match db::get_board_by_short(&conn, &board_short) { - Ok(Some(b)) => b, - _ => return String::new(), - }; - let all_boards = db::get_all_boards(&conn).unwrap_or_default(); - let total = db::count_threads_for_board(&conn, board.id).unwrap_or(0); - let pagination = crate::models::Pagination::new(1, 10, total); - let threads = - db::get_threads_for_board(&conn, board.id, 10, 0).unwrap_or_default(); - let summaries: Vec = threads - .into_iter() - .map(|t| { - let preview = db::get_preview_posts(&conn, t.id, 3).unwrap_or_default(); - let omitted = (t.reply_count - preview.len() as i64).max(0); - crate::models::ThreadSummary { - thread: t, - preview_posts: preview, - omitted, - } - }) - .collect(); - templates::board_page( - &board, - &summaries, - &pagination, - &csrf_err, - &all_boards, - false, - Some(&msg), - db::get_collapse_greentext(&conn), - ) - } - }) - .await - .unwrap_or_default(); - - if !html.is_empty() { - return Ok((jar, Html(html)).into_response()); - } - return Err(AppError::BadRequest(msg)); + let mut resp = Html(templates::error_page(422, &msg)).into_response(); + *resp.status_mut() = axum::http::StatusCode::UNPROCESSABLE_ENTITY; + return Ok(resp); } Err(e) => return Err(e), }; @@ -834,14 +664,21 @@ pub async fn serve_board_media( use tower::ServiceExt; use tower_http::services::ServeFile; - // Reject path-traversal attempts. - if media_path.contains("..") { + // Reject path-traversal attempts and absolute-path escapes. + if media_path.contains("..") || media_path.starts_with('/') { return StatusCode::BAD_REQUEST.into_response(); } let base = PathBuf::from(&CONFIG.upload_dir); let target = base.join(&media_path); + // Verify the resolved path is still inside the upload directory. + // This catches any edge cases that slip past the string checks above + // (e.g. symlinks, exotic percent-encoding handled by the OS). + if !target.starts_with(&base) { + return StatusCode::BAD_REQUEST.into_response(); + } + if target.exists() { // File present — forward the real request (with Range, ETag, etc.) to // ServeFile so it can respond with 206 Partial Content when needed. diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs index 5ed113a..ec211b7 100644 --- a/src/handlers/mod.rs +++ b/src/handlers/mod.rs @@ -9,6 +9,7 @@ pub mod thread; use crate::error::{AppError, Result}; use crate::middleware::validate_csrf; +use crate::workers::JobQueue; use axum::extract::Multipart; /// Parsed fields from a post/thread creation multipart form. @@ -199,3 +200,197 @@ pub fn classify_upload_error(e: anyhow::Error) -> AppError { AppError::BadRequest(msg) } } + +// ─── Shared media upload processing (R2-2) ─────────────────────────────────── +// +// create_thread (board.rs) and post_reply (thread.rs) had identical blocks for: +// 1. Magic-byte mime detection + per-board toggle enforcement +// 2. SHA-256 deduplication lookup +// 3. save_upload / save_audio_with_image_thumb +// 4. record_file_hash +// 5. Image+audio combo validation +// 6. Background job enqueueing +// +// Both handlers now call these shared functions instead of duplicating the code. + +use crate::models::Board; + +/// Process the primary file upload for a new post: detect mime type, enforce +/// per-board media toggles, SHA-256 dedup, save to disk and record hash. +/// +/// Returns `Ok(None)` when `file_data` is `None` (no file attached). +/// Must be called from inside a `spawn_blocking` closure. +pub fn process_primary_upload( + file_data: Option<(Vec, String)>, + board: &Board, + conn: &rusqlite::Connection, + upload_dir: &str, + thumb_size: u32, + max_image_size: usize, + max_video_size: usize, + max_audio_size: usize, + ffmpeg_available: bool, +) -> Result> { + let (data, fname) = match file_data { + Some(f) => f, + None => return Ok(None), + }; + + // Magic-byte detection for accurate type enforcement. + let detected_mime = crate::utils::files::detect_mime_type(&data) + .map_err(|e| AppError::BadRequest(e.to_string()))?; + let detected_media = crate::models::MediaType::from_mime(detected_mime) + .ok_or_else(|| AppError::BadRequest("Unsupported file type.".into()))?; + + match detected_media { + crate::models::MediaType::Image if !board.allow_images => { + return Err(AppError::BadRequest( + "Image uploads are disabled on this board.".into(), + )) + } + crate::models::MediaType::Video if !board.allow_video => { + return Err(AppError::BadRequest( + "Video uploads are disabled on this board.".into(), + )) + } + crate::models::MediaType::Audio if !board.allow_audio => { + return Err(AppError::BadRequest( + "Audio uploads are disabled on this board.".into(), + )) + } + _ => {} + } + + // SHA-256 deduplication — serve the cached entry without re-saving. + let hash = crate::utils::crypto::sha256_hex(&data); + if let Some(cached) = crate::db::find_file_by_hash(conn, &hash)? { + let cached_media = crate::models::MediaType::from_mime(&cached.mime_type) + .unwrap_or(crate::models::MediaType::Image); + return Ok(Some(crate::utils::files::UploadedFile { + file_path: cached.file_path, + thumb_path: cached.thumb_path, + original_name: crate::utils::sanitize::sanitize_filename(&fname), + mime_type: cached.mime_type, + file_size: data.len() as i64, + media_type: cached_media, + processing_pending: false, + })); + } + + let f = crate::utils::files::save_upload( + &data, + &fname, + upload_dir, + &board.short_name, + thumb_size, + max_image_size, + max_video_size, + max_audio_size, + ffmpeg_available, + ) + .map_err(classify_upload_error)?; + crate::db::record_file_hash(conn, &hash, &f.file_path, &f.thumb_path, &f.mime_type)?; + Ok(Some(f)) +} + +/// Process the secondary audio file for an image+audio combo upload. +/// `primary_upload` must already be the processed primary image. +/// +/// Returns `Ok(None)` when `audio_file_data` is `None`. +/// Must be called from inside a `spawn_blocking` closure. +pub fn process_audio_combo( + audio_file_data: Option<(Vec, String)>, + primary_upload: Option<&crate::utils::files::UploadedFile>, + board: &Board, + upload_dir: &str, + max_audio_size: usize, +) -> Result> { + let (aud_data, aud_fname) = match audio_file_data { + Some(f) => f, + None => return Ok(None), + }; + + if !board.allow_audio { + return Err(AppError::BadRequest( + "Audio uploads are disabled on this board.".into(), + )); + } + + // Audio combo requires the primary file to be an image. + let primary_is_image = primary_upload + .map(|u| matches!(u.media_type, crate::models::MediaType::Image)) + .unwrap_or(false); + if !primary_is_image { + return Err(AppError::BadRequest( + "Audio can only be combined with an image upload.".into(), + )); + } + + // Confirm the secondary file is actually audio. + let aud_mime = crate::utils::files::detect_mime_type(&aud_data) + .map_err(|e| AppError::BadRequest(e.to_string()))?; + let aud_media = crate::models::MediaType::from_mime(aud_mime) + .ok_or_else(|| AppError::BadRequest("Unsupported audio type.".into()))?; + if !matches!(aud_media, crate::models::MediaType::Audio) { + return Err(AppError::BadRequest( + "The audio slot only accepts audio files.".into(), + )); + } + + let mut aud_file = crate::utils::files::save_audio_with_image_thumb( + &aud_data, + &aud_fname, + upload_dir, + &board.short_name, + max_audio_size, + ) + .map_err(classify_upload_error)?; + + // Use the image thumbnail as the audio's visual. + if let Some(img) = primary_upload { + aud_file.thumb_path = img.thumb_path.clone(); + } + Ok(Some(aud_file)) +} + +/// Enqueue background media-processing and spam-check jobs for a newly created +/// post. Shared by create_thread and post_reply. +pub fn enqueue_post_jobs( + job_queue: &JobQueue, + post_id: i64, + ip_hash: &str, + body_len: usize, + uploaded: Option<&crate::utils::files::UploadedFile>, + board_short: &str, +) { + // 1. Media post-processing (video transcode / audio waveform) + if let Some(up) = uploaded { + if up.processing_pending { + let job = match up.media_type { + crate::models::MediaType::Video => Some(crate::workers::Job::VideoTranscode { + post_id, + file_path: up.file_path.clone(), + board_short: board_short.to_string(), + }), + crate::models::MediaType::Audio => Some(crate::workers::Job::AudioWaveform { + post_id, + file_path: up.file_path.clone(), + board_short: board_short.to_string(), + }), + _ => None, + }; + if let Some(j) = job { + if let Err(e) = job_queue.enqueue(&j) { + tracing::warn!("Failed to enqueue media job: {}", e); + } + } + } + } + + // 2. Spam analysis + let _ = job_queue.enqueue(&crate::workers::Job::SpamCheck { + post_id, + ip_hash: ip_hash.to_string(), + body_len, + }); +} diff --git a/src/handlers/thread.rs b/src/handlers/thread.rs index aacaadf..5a4ae3f 100644 --- a/src/handlers/thread.rs +++ b/src/handlers/thread.rs @@ -12,8 +12,7 @@ use crate::{ handlers::{board::ensure_csrf, parse_post_multipart}, middleware::{validate_csrf, AppState}, utils::{ - crypto::{hash_ip, new_deletion_token, sha256_hex, verify_pow}, - files::save_upload, + crypto::{hash_ip, new_deletion_token, verify_pow}, sanitize::{ apply_word_filters, escape_html, render_post_body, validate_body, validate_body_with_file, validate_name, @@ -126,8 +125,7 @@ pub async fn post_reply( // Also extract csrf_token before spawn_blocking so the ban page appeal form works. let ban_csrf_token = csrf_cookie.clone().unwrap_or_default(); - let board_short_err = board_short.clone(); - let client_ip_err = client_ip.clone(); // CRIT-2: keep for the error re-render path below + let _board_short_err = board_short.clone(); let result = tokio::task::spawn_blocking({ let pool = state.db.clone(); let job_queue = state.job_queue.clone(); @@ -220,107 +218,26 @@ pub async fn post_reply( let escaped_body = escape_html(&filtered_body); let body_html = render_post_body(&escaped_body); - let uploaded = if let Some((data, fname)) = file_data { - // Enforce per-board media type toggles using magic-byte detection. - let detected_mime = crate::utils::files::detect_mime_type(&data) - .map_err(|e| AppError::BadRequest(e.to_string()))?; - let detected_media = crate::models::MediaType::from_mime(detected_mime) - .ok_or_else(|| AppError::BadRequest("Unsupported file type.".into()))?; - - match detected_media { - crate::models::MediaType::Image if !board.allow_images => { - return Err(AppError::BadRequest( - "Image uploads are disabled on this board.".into(), - )) - } - crate::models::MediaType::Video if !board.allow_video => { - return Err(AppError::BadRequest( - "Video uploads are disabled on this board.".into(), - )) - } - crate::models::MediaType::Audio if !board.allow_audio => { - return Err(AppError::BadRequest( - "Audio uploads are disabled on this board.".into(), - )) - } - _ => {} - } - - // SHA-256 deduplication — FIX[LOW-8]: use sha256_hex from crypto module - let hash = sha256_hex(&data); - if let Some(cached) = db::find_file_by_hash(&conn, &hash)? { - let cached_media = crate::models::MediaType::from_mime(&cached.mime_type) - .unwrap_or(crate::models::MediaType::Image); - Some(crate::utils::files::UploadedFile { - file_path: cached.file_path, - thumb_path: cached.thumb_path, - original_name: crate::utils::sanitize::sanitize_filename(&fname), - mime_type: cached.mime_type, - file_size: data.len() as i64, - media_type: cached_media, - processing_pending: false, - }) - } else { - let f = save_upload( - &data, - &fname, - &upload_dir, - &board.short_name, - thumb_size, - max_image_size, - max_video_size, - max_audio_size, - ffmpeg_available, - ) - .map_err(crate::handlers::classify_upload_error)?; - db::record_file_hash(&conn, &hash, &f.file_path, &f.thumb_path, &f.mime_type)?; - Some(f) - } - } else { - None - }; + let uploaded = crate::handlers::process_primary_upload( + file_data, + &board, + &conn, + &upload_dir, + thumb_size, + max_image_size, + max_video_size, + max_audio_size, + ffmpeg_available, + )?; // ── Image+audio combo ───────────────────────────────────────────── - let audio_uploaded: Option = - if let Some((aud_data, aud_fname)) = audio_file_data { - if !board.allow_audio { - return Err(AppError::BadRequest( - "Audio uploads are disabled on this board.".into(), - )); - } - let primary_is_image = uploaded - .as_ref() - .map(|u| matches!(u.media_type, crate::models::MediaType::Image)) - .unwrap_or(false); - if !primary_is_image { - return Err(AppError::BadRequest( - "Audio can only be combined with an image upload.".into(), - )); - } - let aud_mime = crate::utils::files::detect_mime_type(&aud_data) - .map_err(|e| AppError::BadRequest(e.to_string()))?; - let aud_media = crate::models::MediaType::from_mime(aud_mime) - .ok_or_else(|| AppError::BadRequest("Unsupported audio type.".into()))?; - if !matches!(aud_media, crate::models::MediaType::Audio) { - return Err(AppError::BadRequest( - "The audio slot only accepts audio files.".into(), - )); - } - let mut aud_file = crate::utils::files::save_audio_with_image_thumb( - &aud_data, - &aud_fname, - &upload_dir, - &board.short_name, - max_audio_size, - ) - .map_err(crate::handlers::classify_upload_error)?; - if let Some(ref img) = uploaded { - aud_file.thumb_path = img.thumb_path.clone(); - } - Some(aud_file) - } else { - None - }; + let audio_uploaded = crate::handlers::process_audio_combo( + audio_file_data, + uploaded.as_ref(), + &board, + &upload_dir, + max_audio_size, + )?; let deletion_token = if del_token_val.trim().is_empty() { new_deletion_token() @@ -364,38 +281,14 @@ pub async fn post_reply( )?; } - // ── Background jobs ─────────────────────────────────────────────── - if let Some(ref up) = uploaded { - if up.processing_pending { - let job = match up.media_type { - crate::models::MediaType::Video => { - Some(crate::workers::Job::VideoTranscode { - post_id, - file_path: up.file_path.clone(), - board_short: board.short_name.clone(), - }) - } - crate::models::MediaType::Audio => { - Some(crate::workers::Job::AudioWaveform { - post_id, - file_path: up.file_path.clone(), - board_short: board.short_name.clone(), - }) - } - _ => None, - }; - if let Some(j) = job { - if let Err(e) = job_queue.enqueue(&j) { - tracing::warn!("Failed to enqueue media job for reply: {}", e); - } - } - } - } - let _ = job_queue.enqueue(&crate::workers::Job::SpamCheck { + crate::handlers::enqueue_post_jobs( + &job_queue, post_id, - ip_hash: ip_hash.clone(), - body_len: body_text.len(), - }); + &ip_hash, + body_text.len(), + uploaded.as_ref(), + &board.short_name, + ); info!( "Reply {} posted in thread {} on /{}/", @@ -410,57 +303,15 @@ pub async fn post_reply( .await .map_err(|e| AppError::Internal(anyhow::anyhow!(e)))?; - // BadRequest → re-render the thread page with an inline error banner. + // BadRequest → return a lightweight 422 page instead of re-querying the + // entire thread (which wastes significant DB and CPU under spam load). let redirect_url = match result { Ok(url) => url, Err(AppError::BadRequest(msg)) => { - let html = tokio::task::spawn_blocking({ - let pool = state.db.clone(); - let csrf_err = csrf_cookie.clone().unwrap_or_default(); - let board_short = board_short_err.clone(); - let msg = msg.clone(); - move || -> String { - let conn = match pool.get() { - Ok(c) => c, - Err(_) => return String::new(), - }; - let board = match db::get_board_by_short(&conn, &board_short) { - Ok(Some(b)) => b, - _ => return String::new(), - }; - let thread = match db::get_thread(&conn, thread_id) { - Ok(Some(t)) => t, - _ => return String::new(), - }; - let posts = db::get_posts_for_thread(&conn, thread_id).unwrap_or_default(); - let all_boards = db::get_all_boards(&conn).unwrap_or_default(); - let ip_hash = crate::utils::crypto::hash_ip( - &client_ip_err, - &crate::config::CONFIG.cookie_secret, - ); - let poll = db::get_poll_for_thread(&conn, thread_id, &ip_hash) - .ok() - .flatten(); - crate::templates::thread_page( - &board, - &thread, - &posts, - &csrf_err, - &all_boards, - false, - poll.as_ref(), - Some(&msg), - db::get_collapse_greentext(&conn), - ) - } - }) - .await - .unwrap_or_default(); - - if !html.is_empty() { - return Ok((jar, Html(html)).into_response()); - } - return Err(AppError::BadRequest(msg)); + let mut resp = + axum::response::Html(crate::templates::error_page(422, &msg)).into_response(); + *resp.status_mut() = axum::http::StatusCode::UNPROCESSABLE_ENTITY; + return Ok(resp); } Err(e) => return Err(e), }; diff --git a/src/main.rs b/src/main.rs index af99458..aae7c44 100644 --- a/src/main.rs +++ b/src/main.rs @@ -219,16 +219,42 @@ async fn run_server(port_override: Option) -> anyhow::Result<()> { // Initialise the live site name and subtitle from DB so they're available before any request. { if let Ok(conn) = pool.get() { - let name = db::get_site_name(&conn); + // Site name: use DB value if an admin has set one, otherwise seed + // from CONFIG.forum_name (settings.toml). Using get_site_setting + // (not get_site_name) lets us distinguish "never set" from "set to + // the default", so that editing forum_name in settings.toml and + // restarting takes effect when no admin override is in the DB. + let name_in_db = db::get_site_setting(&conn, "site_name") + .ok() + .flatten() + .filter(|v| !v.trim().is_empty()); + let name = if let Some(db_val) = name_in_db { + db_val + } else { + // Seed DB from settings.toml so get_site_name is always consistent. + let _ = db::set_site_setting(&conn, "site_name", &CONFIG.forum_name); + CONFIG.forum_name.clone() + }; templates::set_live_site_name(&name); // Seed subtitle from settings.toml if not yet configured in DB. - let subtitle = db::get_site_subtitle(&conn); - let subtitle = if subtitle.is_empty() && !CONFIG.initial_site_subtitle.is_empty() { - let _ = db::set_site_setting(&conn, "site_subtitle", &CONFIG.initial_site_subtitle); - CONFIG.initial_site_subtitle.clone() + // BUG FIX: get_site_subtitle() always returns a non-empty fallback + // string, so we must query the DB key directly to detect "never set". + let subtitle_in_db = db::get_site_setting(&conn, "site_subtitle") + .ok() + .flatten() + .filter(|v| !v.trim().is_empty()); + let subtitle = if let Some(db_val) = subtitle_in_db { + db_val } else { - subtitle + // Nothing in DB — seed from CONFIG (settings.toml). + let seed = if !CONFIG.initial_site_subtitle.is_empty() { + CONFIG.initial_site_subtitle.clone() + } else { + "select board to proceed".to_string() + }; + let _ = db::set_site_setting(&conn, "site_subtitle", &seed); + seed }; templates::set_live_site_subtitle(&subtitle); @@ -340,6 +366,18 @@ async fn run_server(port_override: Option) -> anyhow::Result<()> { } }); + // Background: prune expired entries from ADMIN_LOGIN_FAILS every 5 min. + // Prevents unbounded growth under a sustained brute-force attack that + // never produces a successful login (which would trigger the existing + // opportunistic prune path inside clear_login_fails). + tokio::spawn(async move { + let mut iv = tokio::time::interval(Duration::from_secs(300)); + loop { + iv.tick().await; + crate::handlers::admin::prune_login_fails(); + } + }); + let app = build_router(state); let listener = tokio::net::TcpListener::bind(&bind_addr).await?; info!("Listening on http://{}", bind_addr); @@ -373,12 +411,22 @@ fn build_router(state: AppState) -> Router { .route("/static/theme-init.js", get(serve_theme_init_js)) .route("/", get(handlers::board::index)) .route("/{board}", get(handlers::board::board_index)) - .route("/{board}", post(handlers::board::create_thread)) + .route( + "/{board}", + post(handlers::board::create_thread).layer(DefaultBodyLimit::max( + CONFIG.max_video_size.max(CONFIG.max_audio_size), + )), + ) .route("/{board}/catalog", get(handlers::board::catalog)) .route("/{board}/archive", get(handlers::board::board_archive)) .route("/{board}/search", get(handlers::board::search)) .route("/{board}/thread/{id}", get(handlers::thread::view_thread)) - .route("/{board}/thread/{id}", post(handlers::thread::post_reply)) + .route( + "/{board}/thread/{id}", + post(handlers::thread::post_reply).layer(DefaultBodyLimit::max( + CONFIG.max_video_size.max(CONFIG.max_audio_size), + )), + ) .route( "/{board}/post/{id}/edit", get(handlers::thread::edit_post_get), diff --git a/src/templates/admin.rs b/src/templates/admin.rs index 5e6db95..8ba7ecf 100644 --- a/src/templates/admin.rs +++ b/src/templates/admin.rs @@ -35,7 +35,7 @@ pub fn admin_login_page(error: Option<&str>, csrf_token: &str, boards: &[Board]) err = err_html, csrf = escape_html(csrf_token), ); - base_layout("admin login", None, &body, csrf_token, boards, false, false) + base_layout("admin login", None, &body, csrf_token, boards, false) } // ─── Admin panel ────────────────────────────────────────────────────────────── @@ -634,7 +634,7 @@ pub fn admin_panel_page( }, ); - base_layout("admin panel", None, &body, csrf_token, boards, false, false) + base_layout("admin panel", None, &body, csrf_token, boards, false) } // ─── Moderation log ─────────────────────────────────────────────────────────── @@ -701,15 +701,7 @@ pub fn mod_log_page( pagination = pagination_html, ); - base_layout( - "mod log — admin", - None, - &body, - csrf_token, - boards, - false, - false, - ) + base_layout("mod log — admin", None, &body, csrf_token, boards, false) } // ─── VACUUM result ──────────────────────────────────────────────────────────── @@ -745,7 +737,7 @@ pub fn admin_vacuum_result_page(size_before: i64, size_after: i64, csrf_token: & pct = pct, ); - base_layout("VACUUM result", None, &body, csrf_token, &[], false, false) + base_layout("VACUUM result", None, &body, csrf_token, &[], false) } // ─── IP history ─────────────────────────────────────────────────────────────── @@ -862,6 +854,5 @@ pub fn admin_ip_history_page( csrf_token, all_boards, false, - false, ) } diff --git a/src/templates/board.rs b/src/templates/board.rs index d3d938c..1426d64 100644 --- a/src/templates/board.rs +++ b/src/templates/board.rs @@ -138,7 +138,6 @@ pub fn index_page( csrf_token, &all_boards, false, - false, ) } @@ -232,7 +231,6 @@ pub fn board_page( csrf_token, boards, collapse_greentext, - board.allow_archive, ) } @@ -536,7 +534,6 @@ pub fn catalog_page( csrf_token, boards, collapse_greentext, - board.allow_archive, ) } @@ -600,7 +597,6 @@ pub fn search_page( csrf_token, boards, collapse_greentext, - board.allow_archive, ) } @@ -697,6 +693,5 @@ pub fn archive_page( csrf_token, boards, collapse_greentext, - true, // archive page — link is always shown ) } diff --git a/src/templates/mod.rs b/src/templates/mod.rs index 627dd4a..795e51d 100644 --- a/src/templates/mod.rs +++ b/src/templates/mod.rs @@ -19,7 +19,8 @@ use crate::models::*; use crate::utils::sanitize::escape_html; use chrono::{TimeZone, Utc}; use once_cell::sync::Lazy; -use std::sync::RwLock; +use parking_lot::RwLock; +use std::sync::Arc; pub mod admin; pub mod board; @@ -33,66 +34,59 @@ pub use board::*; pub use thread::*; // ─── Live site name (DB-overridable, falls back to CONFIG.forum_name) ───────── +// +// parking_lot::RwLock is used instead of std::sync::RwLock for two reasons: +// 1. It never poisons — no need to handle poisoned-lock errors on the hot path. +// 2. Arc reduces the per-read allocation to a single atomic increment +// instead of a full String::clone(), which matters under high concurrency. -static LIVE_SITE_NAME: Lazy> = Lazy::new(|| RwLock::new(CONFIG.forum_name.clone())); -static LIVE_SITE_SUBTITLE: Lazy> = - Lazy::new(|| RwLock::new("select board to proceed".to_string())); +static LIVE_SITE_NAME: Lazy>> = + Lazy::new(|| RwLock::new(Arc::from(CONFIG.forum_name.as_str()))); +static LIVE_SITE_SUBTITLE: Lazy>> = + Lazy::new(|| RwLock::new(Arc::from("select board to proceed"))); /// In-memory cache for the admin-configured default theme (empty = terminal). /// Updated immediately when admin saves site settings so pages reflect the /// change without requiring a server restart or extra DB round-trip per request. -static LIVE_DEFAULT_THEME: Lazy> = Lazy::new(|| RwLock::new(String::new())); +static LIVE_DEFAULT_THEME: Lazy>> = Lazy::new(|| RwLock::new(Arc::from(""))); /// Call this at startup (after first DB read) and after admin saves a new name. pub fn set_live_site_name(name: &str) { - if let Ok(mut guard) = LIVE_SITE_NAME.write() { - *guard = if name.trim().is_empty() { - CONFIG.forum_name.clone() - } else { - name.to_string() - }; - } + let val: Arc = if name.trim().is_empty() { + Arc::from(CONFIG.forum_name.as_str()) + } else { + Arc::from(name) + }; + *LIVE_SITE_NAME.write() = val; } pub fn set_live_site_subtitle(subtitle: &str) { - if let Ok(mut guard) = LIVE_SITE_SUBTITLE.write() { - *guard = if subtitle.trim().is_empty() { - "select board to proceed".to_string() - } else { - subtitle.to_string() - }; - } + let val: Arc = if subtitle.trim().is_empty() { + Arc::from("select board to proceed") + } else { + Arc::from(subtitle) + }; + *LIVE_SITE_SUBTITLE.write() = val; } /// Update the in-memory default theme cache. /// Pass an empty string to reset to "terminal" (the built-in default). pub fn set_live_default_theme(theme: &str) { - if let Ok(mut guard) = LIVE_DEFAULT_THEME.write() { - *guard = theme.to_string(); - } + *LIVE_DEFAULT_THEME.write() = Arc::from(theme); } /// Read the current live default theme slug. -pub(super) fn live_default_theme() -> String { - LIVE_DEFAULT_THEME - .read() - .map(|g| g.clone()) - .unwrap_or_default() +pub(super) fn live_default_theme() -> Arc { + Arc::clone(&*LIVE_DEFAULT_THEME.read()) } /// Read the current live site name. -pub(super) fn live_site_name() -> String { - LIVE_SITE_NAME - .read() - .map(|g| g.clone()) - .unwrap_or_else(|_| CONFIG.forum_name.clone()) +pub(super) fn live_site_name() -> Arc { + Arc::clone(&*LIVE_SITE_NAME.read()) } -pub(super) fn live_site_subtitle() -> String { - LIVE_SITE_SUBTITLE - .read() - .map(|g| g.clone()) - .unwrap_or_else(|_| "select board to proceed".to_string()) +pub(super) fn live_site_subtitle() -> Arc { + Arc::clone(&*LIVE_SITE_SUBTITLE.read()) } // ─── Shared JS injected once per page ──────────────────────────────────────── @@ -261,7 +255,6 @@ pub(super) fn base_layout( csrf_token: &str, boards: &[Board], collapse_greentext: bool, - _allow_archive: bool, ) -> String { let inner_links: String = boards .iter() @@ -295,7 +288,7 @@ pub(super) fn base_layout( // Only inject the attribute when a non-default theme is configured — no // attribute means "terminal" (the built-in default), matching the // client-side behaviour in theme-init.js. - let default_theme_attr = if !default_theme.is_empty() && default_theme != "terminal" { + let default_theme_attr = if !default_theme.is_empty() && &*default_theme != "terminal" { format!(r#" data-default-theme="{}""#, escape_html(&default_theme)) } else { String::new() @@ -379,7 +372,7 @@ pub(super) fn base_layout( // server's CSRF check, making the appeal feature completely non-functional. pub fn ban_page(reason: &str, csrf_token: &str) -> String { let default_theme = live_default_theme(); - let default_theme_attr = if !default_theme.is_empty() && default_theme != "terminal" { + let default_theme_attr = if !default_theme.is_empty() && &*default_theme != "terminal" { format!(r#" data-default-theme="{}""#, escape_html(&default_theme)) } else { String::new() @@ -422,7 +415,7 @@ appeals are reviewed by site staff. one appeal per 24 hours.

pub fn error_page(code: u16, message: &str) -> String { let default_theme = live_default_theme(); - let default_theme_attr = if !default_theme.is_empty() && default_theme != "terminal" { + let default_theme_attr = if !default_theme.is_empty() && &*default_theme != "terminal" { format!(r#" data-default-theme="{}""#, escape_html(&default_theme)) } else { String::new() diff --git a/src/templates/thread.rs b/src/templates/thread.rs index 9d5cc05..2de7d0e 100644 --- a/src/templates/thread.rs +++ b/src/templates/thread.rs @@ -237,7 +237,6 @@ pub fn thread_page( csrf_token, boards, collapse_greentext, - board.allow_archive, ) } @@ -661,6 +660,5 @@ pub fn edit_post_page( csrf_token, boards, collapse_greentext, - board.allow_archive, ) } From 810e76fe71cea934d4e4bcc040d6160dfbdcf25a Mon Sep 17 00:00:00 2001 From: csd113 Date: Mon, 9 Mar 2026 14:24:09 -0700 Subject: [PATCH 3/7] bug fixes check changelog --- CHANGELOG.md | 125 ++++++++++++++++++++++++ Cargo.lock | 125 ++++++++++++++++++++++++ Cargo.toml | 7 +- src/config.rs | 131 +++++++++++++++++++++++++ src/db/boards.rs | 2 +- src/db/mod.rs | 9 +- src/db/posts.rs | 36 ++++++- src/db/threads.rs | 18 ++-- src/handlers/admin.rs | 215 ++++++++++++++++++++++++++++++++++++++++- src/handlers/board.rs | 66 ++++++++++--- src/handlers/mod.rs | 55 ++++++++--- src/handlers/thread.rs | 76 +++++++++++++-- src/main.rs | 210 ++++++++++++++++++++++++++++++++++++++-- src/templates/admin.rs | 18 +++- src/templates/mod.rs | 80 ++++++++++----- src/utils/files.rs | 67 +++++++++++++ src/workers/mod.rs | 126 +++++++++++++++++++----- static/main.js | 78 ++++++++++++++- static/style.css | 24 +++++ 19 files changed, 1362 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7569e87..295afdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,131 @@ All notable changes to RustChan will be documented in this file. ## [1.0.13] — 2026-03-08 +## WAL Mode + Connection Tuning +**`db/mod.rs`** + +`cache_size` bumped from `-4096` (4 MiB) to `-32000` (32 MiB) in the pool's `with_init` pragma block. The `journal_mode=WAL` and `synchronous=NORMAL` pragmas were already present. + +--- + +## Missing Indexes +**`db/mod.rs`** + +Two new migrations added at the end of the migration table: + +- **Migration 23:** `CREATE INDEX IF NOT EXISTS idx_posts_thread_id ON posts(thread_id)` — supplements the existing composite index for queries that filter on `thread_id` alone. +- **Migration 24:** `CREATE INDEX IF NOT EXISTS idx_posts_ip_hash ON posts(ip_hash)` — eliminates the full-table scan on the admin IP history page and per-IP cooldown checks. + +--- + +## Prepared Statement Caching Audit +**`db/threads.rs` · `db/boards.rs` · `db/posts.rs`** + +All remaining bare `conn.prepare(...)` calls on hot or repeated queries replaced with `conn.prepare_cached(...)`: `delete_thread`, `archive_old_threads`, `prune_old_threads` (outer `SELECT`) in `threads.rs`; `delete_board` in `boards.rs`; `search_posts` in `posts.rs`. Every query path is now consistently cached. + +--- + +## Transaction Batching for Thread Prune +Already implemented in the codebase. Both `prune_old_threads` and `archive_old_threads` already use `unchecked_transaction()` / `tx.commit()` to batch all deletes/updates into a single atomic transaction. No changes needed. + +--- + +## RETURNING Clause for Inserts +**`db/threads.rs` · `db/posts.rs`** + +`create_thread_with_op` and `create_post_inner` now use `INSERT … RETURNING id` via `query_row`, replacing the `execute()` + `last_insert_rowid()` pattern. The new ID is returned atomically in the same statement, eliminating the implicit coupling to connection-local state. + +--- + +## Scheduled VACUUM +**`config.rs` · `main.rs`** + +Added `auto_vacuum_interval_hours = 24` to config. A background Tokio task now sleeps for the configured interval (staggered from startup), then calls `db::run_vacuum()` via `spawn_blocking` and logs the bytes reclaimed. + +--- + +## Expired Poll Cleanup +**`config.rs` · `main.rs` · `db/posts.rs`** + +Added `poll_cleanup_interval_hours = 72`. A new `cleanup_expired_poll_votes()` DB function deletes vote rows for polls whose `expires_at` is older than the retention window. A background task runs it on the configured interval, preserving poll questions and options. + +--- + +## DB Size Warning +**`config.rs` · `handlers/admin.rs` · `templates/admin.rs`** + +Added `db_warn_threshold_mb = 2048`. The admin panel handler reads the actual file size via `std::fs::metadata`, computes a boolean flag, and passes it to the template. The template renders a red warning banner in the database maintenance section when the threshold is exceeded. + +--- + +## Job Queue Back-Pressure +**`config.rs` · `workers/mod.rs`** + +Added `job_queue_capacity = 1000`. The `enqueue()` method now checks `pending_job_count()` before inserting — if the queue is at or over capacity, the job is dropped with a `warn!` log and a sentinel `-1` is returned, avoiding OOM under post floods. + +--- + +## Coalesce Duplicate Media Jobs +**`workers/mod.rs`** + +Added an `Arc>` (`in_progress`) to `JobQueue`. Before dispatching a `VideoTranscode` or `AudioWaveform` job, `handle_job` checks if the `file_path` is already in the map — if so it skips and logs. The entry is removed on both success and failure. + +--- + +## FFmpeg Timeout +**`config.rs` · `workers/mod.rs`** + +Replaced hardcoded `FFMPEG_TRANSCODE_TIMEOUT` / `FFMPEG_WAVEFORM_TIMEOUT` constants with `CONFIG.ffmpeg_timeout_secs` (default: `120`). Both `transcode_video` and `generate_waveform` now read this value at runtime so operators can tune it in `settings.toml`. + +--- + +## Auto-Archive Before Prune +**`workers/mod.rs` · `config.rs`** + +`prune_threads` now evaluates `allow_archive || CONFIG.archive_before_prune`. The new global flag (default `true`) means no thread is ever silently hard-deleted on a board that has archiving enabled at the global level, even if the individual board didn't opt in. + +--- + +## Waveform Cache Eviction +**`main.rs` · `config.rs`** + +A background task runs every hour (after a 30-min startup stagger). It walks every `{board}/thumbs/` directory, sorts files oldest-first by mtime, and deletes until total size is under `waveform_cache_max_mb` (default 200 MiB). A new `evict_thumb_cache` function handles the scan-and-prune logic; originals are never touched. + +--- + +## Streaming Multipart +**`handlers/mod.rs`** + +The old `.bytes().await` (full in-memory buffering) is replaced by `read_field_bytes`, which streams via `.chunk()` and returns a `413 UploadTooLarge` the moment the running total exceeds the configured limit — before memory is exhausted. + +--- + +## ETag / Conditional GET +**`handlers/board.rs` · `handlers/thread.rs`** + +Both handlers now accept `HeaderMap`, derive an ETag (board index: `"{max_bump_ts}-{page}"`; thread: `"{bumped_at}"`), check `If-None-Match`, and return `304 Not Modified` on a hit. The ETag is included on all 200 responses too. + +--- + +## Gzip / Brotli Compression +**`main.rs` · `Cargo.toml`** + +`tower-http` features updated to `compression-full`. `CompressionLayer::new()` added to the middleware stack — it negotiates gzip, Brotli, or zstd based on the client's `Accept-Encoding` header. + +--- + +## Blocking Pool Sizing +**`main.rs` · `config.rs`** + +`#[tokio::main]` replaced with a manual `tokio::runtime::Builder` that calls `.max_blocking_threads(CONFIG.blocking_threads)`. Default is `logical_cpus × 4` (auto-detected); configurable via `blocking_threads` in `settings.toml` or `CHAN_BLOCKING_THREADS`. + +--- + +## EXIF Orientation Correction +**`utils/files.rs` · `Cargo.toml`** + +`kamadak-exif = "0.5"` added. `generate_image_thumb` now calls `read_exif_orientation` for JPEGs and passes the result to `apply_exif_orientation`, which dispatches to `imageops::rotate90/180/270` and `flip_horizontal/vertical` as needed. Non-JPEG formats skip the EXIF path entirely. + ### ✨ Added - **Backup system rewritten to stream instead of buffering in RAM** — all backup operations previously loaded entire zip files into memory, risking OOM on large instances. Downloads now stream from disk in 64 KiB chunks (browsers also get a proper progress bar). Backup creation now writes directly to disk via temp files with atomic rename on success, so partial backups never appear in the saved list. Individual file archiving now streams through an 8 KiB buffer instead of reading each file fully into memory. Peak RAM usage dropped from "entire backup size" to roughly 64 KiB regardless of instance size. - **ChanClassic theme** — a new theme that mimics the classic 4chan aesthetic: light tan/beige background, maroon/red accents, blue post-number links, and the iconic post block styling. Available in the theme picker alongside existing themes. diff --git a/Cargo.lock b/Cargo.lock index 8059c5e..e8e1340 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -94,6 +109,18 @@ dependencies = [ "password-hash", ] +[[package]] +name = "async-compression" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -222,6 +249,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -253,6 +301,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -328,6 +378,26 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "brotli", + "compression-core", + "flate2", + "memchr", + "zstd", + "zstd-safe", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "cookie" version = "0.18.1" @@ -812,6 +882,16 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.91" @@ -822,6 +902,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kamadak-exif" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4fc70d0ab7e5b6bafa30216a6b48705ea964cdfc29c050f2412295eba58077" +dependencies = [ + "mutate_once", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -957,6 +1046,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "mutate_once" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13d2233c9842d08cfe13f9eac96e207ca6a2ea10b80259ebe8ad0268be27d2af" + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1268,6 +1363,7 @@ dependencies = [ "dashmap", "hex", "image", + "kamadak-exif", "libc", "once_cell", "parking_lot", @@ -1700,6 +1796,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ + "async-compression", "bitflags", "bytes", "futures-core", @@ -2263,6 +2360,34 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index e42ab1d..fae4642 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ async-trait = "0.1" axum = { version = "0.8", features = ["multipart"] } axum-extra = { version = "0.12", features = ["cookie"] } tower = "0.5" -tower-http = { version = "0.6", features = ["fs", "set-header"] } +tower-http = { version = "0.6", features = ["fs", "set-header", "compression-full"] } tokio = { version = "1", features = ["full"] } tokio-util = "0.7" @@ -54,6 +54,11 @@ rand_core = { version = "0.6", features = ["getrandom"] } image = { version = "0.25", default-features = false, features = ["jpeg", "png", "gif", "webp"] } +# EXIF orientation correction: read Orientation tag from JPEG uploads and +# apply the corresponding rotation before thumbnailing so that photos taken +# on phones display upright (4.1). +kamadak-exif = "0.5" + clap = { version = "4", features = ["derive"] } uuid = { version = "1", features = ["v4"] } diff --git a/src/config.rs b/src/config.rs index 64087d2..f43b667 100644 --- a/src/config.rs +++ b/src/config.rs @@ -63,6 +63,33 @@ struct SettingsFile { /// How often to run PRAGMA wal_checkpoint(TRUNCATE), in seconds. /// Set to 0 to disable. Default: 3600 (hourly). wal_checkpoint_interval_secs: Option, + /// How often to run VACUUM to reclaim disk space, in hours. + /// Set to 0 to disable. Default: 24 (daily). + auto_vacuum_interval_hours: Option, + /// How often to purge vote records for expired polls, in hours. + /// Set to 0 to disable. Default: 72 (every 3 days). + poll_cleanup_interval_hours: Option, + /// Database file size (MB) above which a warning banner is shown in the + /// admin panel. Set to 0 to disable. Default: 2048 (2 GiB). + db_warn_threshold_mb: Option, + /// Maximum number of pending jobs in the background job queue. + /// When this limit is reached, new jobs are dropped (with a warning) rather + /// than accepted. Default: 1000. + job_queue_capacity: Option, + /// Maximum seconds to allow a single FFmpeg transcode or waveform job to + /// run before it is killed. Default: 120. + ffmpeg_timeout_secs: Option, + /// When true, overflow threads are always archived rather than hard-deleted, + /// even on boards with allow_archive = false. Default: true. + archive_before_prune: Option, + /// Maximum total size (MiB) of all thumbnail/waveform cache files across all + /// boards. A background task evicts the oldest files when exceeded. + /// Set to 0 to disable. Default: 200. + waveform_cache_max_mb: Option, + /// Number of threads in Tokio's blocking pool (spawn_blocking). + /// Defaults to logical CPUs × 4. Increase if DB/render latency is a bottleneck + /// under load. + blocking_threads: Option, } fn load_settings_file() -> SettingsFile { @@ -138,6 +165,46 @@ require_ffmpeg = false # Set to 0 to disable. Default: 3600 (hourly). wal_checkpoint_interval_secs = 3600 +# How often (in hours) to run VACUUM automatically to reclaim disk space +# freed by deleted posts and threads. Set to 0 to disable. Default: 24. +auto_vacuum_interval_hours = 24 + +# How often (in hours) to purge vote records for polls that have expired. +# The poll question and options are kept for display; only per-IP vote rows +# are deleted. Set to 0 to disable. Default: 72. +poll_cleanup_interval_hours = 72 + +# Database file size (MiB) above which a warning banner appears in the admin +# panel. Set to 0 to disable. Default: 2048 (2 GiB). +db_warn_threshold_mb = 2048 + +# Maximum number of pending background jobs (video transcode, waveform, etc.) +# allowed in the queue at once. When this limit is reached, new jobs are +# silently dropped (with a warning log) rather than accepted. Default: 1000. +job_queue_capacity = 1000 + +# Maximum seconds a single FFmpeg transcode or waveform job may run before +# it is killed. Prevents pathological media files from stalling the worker +# pool indefinitely. Default: 120. +ffmpeg_timeout_secs = 120 + +# When true, threads that would be hard-deleted by the prune worker are instead +# moved to the archive table, even on boards where archiving is disabled. This +# acts as a global safety net against silent data loss when a board hits its +# thread limit. Default: true. +archive_before_prune = true + +# Maximum total size (MiB) of all thumbnail/waveform cache files across all +# boards. A background task periodically evicts the oldest files when the +# total exceeds this value. Set to 0 to disable. Default: 200. +waveform_cache_max_mb = 200 + +# Number of threads in Tokio's blocking pool (spawn_blocking). Every page +# render and DB write goes through this pool; sizing it to CPUs × 4 prevents +# it from becoming a bottleneck under concurrent load. +# Default: logical CPUs × 4 (auto-detected at startup; leave 0 for auto). +blocking_threads = 0 + # Secret key for IP hashing. # AUTO-GENERATED on first run — do NOT change after your first post, # or all existing IP hashes become invalid (bans will stop working). @@ -194,6 +261,24 @@ pub struct Config { pub https_cookies: bool, /// Interval in seconds between WAL checkpoint runs. 0 = disabled. pub wal_checkpoint_interval: u64, + /// Interval in hours between automatic VACUUM runs. 0 = disabled. + pub auto_vacuum_interval_hours: u64, + /// Interval in hours between expired poll vote cleanup runs. 0 = disabled. + pub poll_cleanup_interval_hours: u64, + /// DB file size threshold in bytes above which admin panel shows a warning. + /// 0 = disabled. + pub db_warn_threshold_bytes: u64, + /// Maximum number of pending jobs before new ones are dropped. + pub job_queue_capacity: u64, + /// Maximum seconds a single FFmpeg job may run before being killed. + pub ffmpeg_timeout_secs: u64, + /// When true, threads are always archived (never hard-deleted) on prune, + /// overriding individual board settings. + pub archive_before_prune: bool, + /// Total thumbnail/waveform cache size limit in bytes. 0 = disabled. + pub waveform_cache_max_bytes: u64, + /// Number of threads in Tokio's blocking pool. Default: logical CPUs × 4. + pub blocking_threads: usize, } impl Config { @@ -278,6 +363,52 @@ impl Config { "CHAN_WAL_CHECKPOINT_SECS", s.wal_checkpoint_interval_secs.unwrap_or(3600), ), + auto_vacuum_interval_hours: env_parse( + "CHAN_AUTO_VACUUM_HOURS", + s.auto_vacuum_interval_hours.unwrap_or(24), + ), + poll_cleanup_interval_hours: env_parse( + "CHAN_POLL_CLEANUP_HOURS", + s.poll_cleanup_interval_hours.unwrap_or(72), + ), + db_warn_threshold_bytes: { + let mb = env_parse::( + "CHAN_DB_WARN_THRESHOLD_MB", + s.db_warn_threshold_mb.unwrap_or(2048), + ); + mb * 1024 * 1024 + }, + job_queue_capacity: env_parse( + "CHAN_JOB_QUEUE_CAPACITY", + s.job_queue_capacity.unwrap_or(1000), + ), + ffmpeg_timeout_secs: env_parse( + "CHAN_FFMPEG_TIMEOUT_SECS", + s.ffmpeg_timeout_secs.unwrap_or(120), + ), + archive_before_prune: env_bool( + "CHAN_ARCHIVE_BEFORE_PRUNE", + s.archive_before_prune.unwrap_or(true), + ), + waveform_cache_max_bytes: { + let mb = env_parse::( + "CHAN_WAVEFORM_CACHE_MAX_MB", + s.waveform_cache_max_mb.unwrap_or(200), + ); + mb * 1024 * 1024 + }, + blocking_threads: { + let cpus = std::thread::available_parallelism() + .map(|p| p.get()) + .unwrap_or(4); + let configured = + env_parse("CHAN_BLOCKING_THREADS", s.blocking_threads.unwrap_or(0)); + if configured == 0 { + cpus * 4 + } else { + configured + } + }, } } diff --git a/src/db/boards.rs b/src/db/boards.rs index 9d8df92..adad9dc 100644 --- a/src/db/boards.rs +++ b/src/db/boards.rs @@ -265,7 +265,7 @@ pub fn delete_board(conn: &rusqlite::Connection, id: i64) -> Result> // Collect every file path that belongs to this board before deletion. // The CASCADE on boards→threads→posts handles DB row removal, but the // on-disk files must be cleaned up by the caller. - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT p.file_path, p.thumb_path, p.audio_file_path FROM posts p JOIN threads t ON p.thread_id = t.id diff --git a/src/db/mod.rs b/src/db/mod.rs index 2e22d47..b06324b 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -95,7 +95,7 @@ pub fn init_pool() -> Result { "PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA foreign_keys = ON; - PRAGMA cache_size = -4096; -- 4 MiB page cache per connection + PRAGMA cache_size = -32000; -- 32 MiB page cache per connection (1.1) PRAGMA temp_store = MEMORY; PRAGMA mmap_size = 67108864; -- 64 MiB memory-mapped IO PRAGMA busy_timeout = 10000; -- 10s: wait instead of instant SQLITE_BUSY", @@ -372,6 +372,13 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { created_at INTEGER NOT NULL DEFAULT (unixepoch()) )"), (22, "ALTER TABLE boards ADD COLUMN post_cooldown_secs INTEGER NOT NULL DEFAULT 0"), + // 1.2 — Explicit indexes for hot query paths that were previously full-scans. + // idx_posts_thread_id: covers get_posts_for_thread and all WHERE thread_id = ? + // queries that don't need the composite (thread_id, created_at) ordering index. + (23, "CREATE INDEX IF NOT EXISTS idx_posts_thread_id ON posts(thread_id)"), + // idx_posts_ip_hash: covers the per-IP admin history page and cooldown checks + // which previously scanned every row in the posts table. + (24, "CREATE INDEX IF NOT EXISTS idx_posts_ip_hash ON posts(ip_hash)"), ]; let mut highest_applied = current_version; diff --git a/src/db/posts.rs b/src/db/posts.rs index d944e29..b8bb2d0 100644 --- a/src/db/posts.rs +++ b/src/db/posts.rs @@ -112,13 +112,18 @@ pub fn get_preview_posts(conn: &rusqlite::Connection, thread_id: i64, n: i64) -> /// /// `pub(super)` so sibling modules can call it without exposing it externally. pub(super) fn create_post_inner(conn: &rusqlite::Connection, p: &super::NewPost) -> Result { - conn.execute( + // 1.5: INSERT … RETURNING id returns the new post ID atomically in the same + // statement. This removes the implicit reliance on connection-local + // last_insert_rowid() state, which is correct only because the connection + // is not shared mid-transaction — safe today but fragile by design. + let post_id: i64 = conn.query_row( "INSERT INTO posts (thread_id, board_id, name, tripcode, subject, body, body_html, ip_hash, file_path, file_name, file_size, thumb_path, mime_type, deletion_token, is_op, media_type, audio_file_path, audio_file_name, audio_file_size, audio_mime_type) - VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20)", + VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20) + RETURNING id", params![ p.thread_id, p.board_id, @@ -141,8 +146,9 @@ pub(super) fn create_post_inner(conn: &rusqlite::Connection, p: &super::NewPost) p.audio_file_size, p.audio_mime_type, ], + |r| r.get(0), )?; - Ok(conn.last_insert_rowid()) + Ok(post_id) } pub fn create_post(conn: &rusqlite::Connection, p: &super::NewPost) -> Result { @@ -348,7 +354,7 @@ pub fn search_posts( offset: i64, ) -> Result> { let pattern = format!("%{}%", query.replace('%', "\\%").replace('_', "\\_")); - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT id, thread_id, board_id, name, tripcode, subject, body, body_html, ip_hash, file_path, file_name, file_size, thumb_path, mime_type, created_at, deletion_token, is_op, media_type, @@ -566,6 +572,28 @@ pub fn get_poll_context( .optional()?) } +// ─── Poll maintenance ───────────────────────────────────────────────────────── + +/// Delete vote rows for polls whose `expires_at` is older than the given +/// retention cutoff (a Unix timestamp). The poll question and options are +/// preserved for historical display; only the per-IP vote records are pruned. +/// +/// Returns the number of vote rows deleted. +pub fn cleanup_expired_poll_votes( + conn: &rusqlite::Connection, + retention_cutoff: i64, +) -> Result { + let n = conn.execute( + "DELETE FROM poll_votes + WHERE poll_id IN ( + SELECT id FROM polls + WHERE expires_at IS NOT NULL AND expires_at < ?1 + )", + params![retention_cutoff], + )?; + Ok(n) +} + // ─── Background job queue ───────────────────────────────────────────────────── // // Jobs flow through: pending → running → done | failed diff --git a/src/db/threads.rs b/src/db/threads.rs index 8728643..e4a035a 100644 --- a/src/db/threads.rs +++ b/src/db/threads.rs @@ -130,11 +130,14 @@ pub fn create_thread_with_op( conn.execute("BEGIN IMMEDIATE", [])?; let result = (|| -> Result<(i64, i64)> { - conn.execute( - "INSERT INTO threads (board_id, subject) VALUES (?1, ?2)", + // 1.5: Use RETURNING id to retrieve the new thread ID atomically in the + // same statement, removing the implicit coupling to connection state that + // last_insert_rowid() has (safe here but fragile by design). + let thread_id: i64 = conn.query_row( + "INSERT INTO threads (board_id, subject) VALUES (?1, ?2) RETURNING id", params![board_id, subject], + |r| r.get(0), )?; - let thread_id = conn.last_insert_rowid(); let post_with_thread = super::NewPost { thread_id, @@ -203,8 +206,9 @@ pub fn set_thread_archived( } pub fn delete_thread(conn: &rusqlite::Connection, thread_id: i64) -> Result> { - let mut stmt = conn - .prepare("SELECT file_path, thumb_path, audio_file_path FROM posts WHERE thread_id = ?1")?; + let mut stmt = conn.prepare_cached( + "SELECT file_path, thumb_path, audio_file_path FROM posts WHERE thread_id = ?1", + )?; let rows: Vec<(Option, Option, Option)> = stmt .query_map(params![thread_id], |r| { Ok((r.get(0)?, r.get(1)?, r.get(2)?)) @@ -241,7 +245,7 @@ pub fn delete_thread(conn: &rusqlite::Connection, thread_id: i64) -> Result Result { let ids: Vec = { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT id FROM threads WHERE board_id = ?1 AND sticky = 0 AND archived = 0 ORDER BY bumped_at DESC LIMIT -1 OFFSET ?2", @@ -289,7 +293,7 @@ pub fn prune_old_threads( max: i64, ) -> Result> { let ids: Vec = { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT id FROM threads WHERE board_id = ?1 AND sticky = 0 AND archived = 0 ORDER BY bumped_at DESC LIMIT -1 OFFSET ?2", diff --git a/src/handlers/admin.rs b/src/handlers/admin.rs index 5aafff5..a15c10f 100644 --- a/src/handlers/admin.rs +++ b/src/handlers/admin.rs @@ -430,6 +430,19 @@ pub async fn admin_panel( let db_size_bytes = db::get_db_size_bytes(&conn).unwrap_or(0); + // 1.8: Compute whether the DB file size exceeds the configured + // warning threshold. Uses the on-disk file size (via fs::metadata) + // rather than SQLite's PRAGMA page_count estimate for accuracy, + // falling back to the pragma value if the path is unavailable. + let db_size_warning = if CONFIG.db_warn_threshold_bytes > 0 { + let file_size = std::fs::metadata(&CONFIG.database_path) + .map(|m| m.len()) + .unwrap_or(db_size_bytes as u64); + file_size >= CONFIG.db_warn_threshold_bytes + } else { + false + }; + // Read the tor onion address from the hostname file if tor is enabled. let tor_address: Option = if CONFIG.enable_tor_support { let data_dir = std::path::PathBuf::from(&CONFIG.database_path) @@ -456,6 +469,7 @@ pub async fn admin_panel( &full_backups, &board_backups_list, db_size_bytes, + db_size_warning, &reports, &appeals, &site_name, @@ -527,6 +541,9 @@ pub async fn create_board( require_admin_session_sid(&conn, session_id.as_deref())?; db::create_board(&conn, &short, &name, &description, nsfw)?; info!("Admin created board /{}/", short); + // Refresh live board list so the top bar on any subsequent error + // page includes the newly created board. + crate::templates::set_live_boards(db::get_all_boards(&conn)?); Ok(()) } }) @@ -594,6 +611,9 @@ pub async fn delete_board( form.board_id, paths.len() ); + // Refresh live board list so the top bar immediately stops showing + // the deleted board — important because error pages use this cache. + crate::templates::set_live_boards(db::get_all_boards(&conn)?); Ok(()) } }) @@ -1297,6 +1317,7 @@ pub async fn update_board_settings( post_cooldown_secs, )?; info!("Admin updated settings for board id={}", board_id); + crate::templates::set_live_boards(db::get_all_boards(&conn)?); Ok(()) } }) @@ -1726,11 +1747,15 @@ pub async fn admin_restore( match db::create_session(&live_conn, &fresh_sid, admin_id, expires_at) { Ok(_) => { info!("Admin restore completed; new session issued for admin_id={}", admin_id); + // Refresh live board list — the restored DB may have + // different boards than what was running before. + if let Ok(boards) = db::get_all_boards(&live_conn) { + crate::templates::set_live_boards(boards); + } Ok(fresh_sid) } Err(e) => { warn!("Restore: could not create new session (admin_id={} may not exist in backup): {}", admin_id, e); - // Return empty string as a sentinel — handler will send to login. Ok(String::new()) } } @@ -1764,9 +1789,125 @@ pub async fn admin_restore( // 1 KiB zip (a "zip bomb") can expand to gigabytes, exhausting disk or memory. // copy_limited() caps the decompressed size of each entry. -/// Maximum bytes to extract from any single zip entry. -/// Set to 16 GiB — these are admin-only restore endpoints, so individual -/// entries (large videos, the SQLite DB) can legitimately be several GiB. +// Maximum bytes to extract from any single zip entry. +// Set to 16 GiB — these are admin-only restore endpoints, so individual +// entries (large videos, the SQLite DB) can legitimately be several GiB. +// ─── Quotelink ID remapping ─────────────────────────────────────────────────── +// +// When a board backup is restored, posts receive new auto-incremented IDs +// because other boards' posts already occupy the original IDs in the global +// `posts` table. `remap_body_quotelinks` and `remap_body_html_quotelinks` +// rewrite the raw text and rendered HTML of each restored post so that in-board +// quotelinks point to the new IDs instead of the now-stale original ones. +// +// Design constraints: +// +// 1. ONLY same-board links are remapped. Cross-board references (`>>>/b/N`) +// point to other boards whose IDs are unchanged by this restore operation +// and must not be altered. +// +// 2. `pairs` must be sorted by old-ID string length *descending* before being +// passed to these functions. This prevents a shorter ID (e.g. "10") from +// being substituted as a prefix of a longer one ("1000") before the longer +// match has a chance to fire. Example: +// pairs = [("1000","2500"), ("100","800"), ("10","50"), ("1","3")] +// Processing "1000" before "100" prevents "1000" → "8000" (wrong first). +// +// 3. `body` stores the original markdown-like text the user typed. In-board +// quotelinks appear as `>>{old_id}` (e.g. `>>500`). A regex-free approach +// is used: for each (old, new) pair, replace `>>{old}` followed by a +// non-digit (or end-of-string) to avoid `>>100` matching inside `>>1000`. +// +// 4. `body_html` stores pre-rendered HTML. In-board quotelinks look like: +// >>{N} +// Cross-board quotelinks look like: +// >>>/… +// We replace `href="#p{old}"` (exclusively in-board) and +// `data-pid="{old}">>>{old}` (display text also exclusive to +// in-board links — cross-board display text has `>>>/` prefix). + +// Rewrite in-board `>>{old_id}` references in the raw post body. +// `pairs` must be pre-sorted by old-ID string length descending. +fn remap_body_quotelinks(body: &str, pairs: &[(String, String)]) -> String { + // Avoid cloning when there is nothing to change. + if pairs.is_empty() { + return body.to_string(); + } + let mut result = body.to_string(); + for (old, new) in pairs { + // Match `>>{old}` only when NOT immediately followed by another digit, + // so we don't turn `>>1000` into `>>new_id_for_1000` when processing + // the `>>100` entry first. + // + // Implementation: scan for every occurrence of `>>{old}` in the string + // and check the next character. Replace left-to-right using byte indices + // to avoid re-scanning already-replaced sections. + let needle = format!(">>{}", old); + let mut out = String::with_capacity(result.len()); + let mut pos = 0; + let bytes = result.as_bytes(); + while pos < bytes.len() { + match result[pos..].find(&needle) { + None => { + out.push_str(&result[pos..]); + break; + } + Some(rel) => { + let abs = pos + rel; + let after = abs + needle.len(); + // Only replace when the char after the match is not a digit. + let next_is_digit = bytes + .get(after) + .map(|b| b.is_ascii_digit()) + .unwrap_or(false); + out.push_str(&result[pos..abs]); + if next_is_digit { + // Not the right match — keep the original text. + out.push_str(&needle); + } else { + out.push_str(">>"); + out.push_str(new); + } + pos = after; + } + } + } + result = out; + } + result +} + +/// Rewrite in-board quotelink IDs in pre-rendered `body_html`. +/// +/// Targets two patterns that are exclusive to same-board quotelinks: +/// • `href="#p{old}"` — the anchor href +/// • `data-pid="{old}">>>{old}` — the data attribute + display text +/// +/// Cross-board links use `href="/board/post/{N}"` and display `>>>/…` +/// so neither pattern matches them. +/// +/// `pairs` must be pre-sorted by old-ID string length descending. +fn remap_body_html_quotelinks(body_html: &str, pairs: &[(String, String)]) -> String { + if pairs.is_empty() { + return body_html.to_string(); + } + let mut result = body_html.to_string(); + for (old, new) in pairs { + // Pattern 1: href="#p{old}" → href="#p{new}" + let old_href = format!("href=\"#p{}\"", old); + let new_href = format!("href=\"#p{}\"", new); + result = result.replace(&old_href, &new_href); + + // Pattern 2: data-pid="{old}">>>{old} + // This uniquely identifies in-board quotelinks: cross-board links have + // ">>>/" as their display text, never bare ">>{N}". + let old_tail = format!("data-pid=\"{old}\">>>{old}", old = old); + let new_tail = format!("data-pid=\"{new}\">>>{new}", new = new); + result = result.replace(&old_tail, &new_tail); + } + result +} + const ZIP_ENTRY_MAX_BYTES: u64 = 16 * 1024 * 1024 * 1024; /// Like `std::io::copy` but returns `InvalidData` if more than `max_bytes` @@ -3620,6 +3761,20 @@ pub async fn board_restore( thread_id_map.insert(t.id, conn.last_insert_rowid()); } + // ── Insert posts, recording old → new ID mapping ────── + // + // Board restore cannot reuse original post IDs because + // other boards' posts may already occupy those rows in the + // global `posts` table (SQLite AUTOINCREMENT is site-wide, + // not per-board). The posts therefore land at new IDs. + // + // `post_id_map` captures every (old_id → new_id) pair so + // that we can fix up in-board quotelink references in + // `body` and `body_html` in the second pass below. + // Without this, `>>500` in a restored post still points to + // old ID 500 which no longer exists — clicks produce 404s + // and hover previews show "Post not found". + let mut post_id_map: HashMap = HashMap::new(); for p in &manifest.posts { let new_tid = *thread_id_map.get(&p.thread_id).ok_or_else(|| { AppError::Internal(anyhow::anyhow!("Post {} refs unknown thread {}", p.id, p.thread_id)) @@ -3637,6 +3792,53 @@ pub async fn board_restore( ], ) .map_err(|e| AppError::Internal(anyhow::anyhow!("Insert post {}: {}", p.id, e)))?; + post_id_map.insert(p.id, conn.last_insert_rowid()); + } + + // ── Quotelink fixup pass ────────────────────────────── + // + // If any post IDs changed (which they almost always do + // when restoring into a live DB), rewrite `body` and + // `body_html` for every restored post so that in-board + // quotelinks point at the new IDs. + // + // Only same-board references are remapped. Cross-board + // links (`>>>/board/N`) point to other boards whose IDs + // are unchanged; they are deliberately left untouched. + let any_changed = post_id_map.iter().any(|(old, new)| old != new); + if any_changed { + // Sort by old-ID string length descending so that + // longer IDs are replaced before any prefix of theirs. + // e.g. replace >>1000 before >>100 before >>10 before >>1. + let mut pairs: Vec<(String, String)> = post_id_map + .iter() + .filter(|(old, new)| old != new) + .map(|(old, new)| (old.to_string(), new.to_string())) + .collect(); + pairs.sort_by(|a, b| b.0.len().cmp(&a.0.len()).then(b.0.cmp(&a.0))); + + for p in &manifest.posts { + let new_post_id = match post_id_map.get(&p.id) { + Some(&id) => id, + None => continue, + }; + + let new_body = remap_body_quotelinks(&p.body, &pairs); + let new_body_html = remap_body_html_quotelinks(&p.body_html, &pairs); + + // Only issue the UPDATE when the text actually + // changed — avoids unnecessary I/O when none of + // the post IDs appear in this post's body. + if new_body != p.body || new_body_html != p.body_html { + conn.execute( + "UPDATE posts SET body = ?1, body_html = ?2 WHERE id = ?3", + params![new_body, new_body_html, new_post_id], + ) + .map_err(|e| AppError::Internal(anyhow::anyhow!( + "Fixup quotelinks for post {}: {}", new_post_id, e + )))?; + } + } } let mut poll_id_map: HashMap = HashMap::new(); @@ -3734,6 +3936,11 @@ pub async fn board_restore( } info!("Admin board restore completed for /{}/", board_short); + // Refresh live board list — board_restore may have created a + // board that didn't exist before, so the top bar must update. + if let Ok(boards) = db::get_all_boards(&conn) { + crate::templates::set_live_boards(boards); + } let safe_short: String = board_short .chars() .filter(|c| c.is_ascii_alphanumeric()) diff --git a/src/handlers/board.rs b/src/handlers/board.rs index 834c4e1..2f098c0 100644 --- a/src/handlers/board.rs +++ b/src/handlers/board.rs @@ -27,6 +27,7 @@ use crate::{ }; use axum::{ extract::{Form, Multipart, Path, Query, State}, + http::HeaderMap, response::{Html, IntoResponse, Redirect, Response}, }; use axum_extra::extract::cookie::{Cookie, CookieJar, SameSite}; @@ -89,7 +90,8 @@ pub async fn board_index( Path(board_short): Path, Query(params): Query>, jar: CookieJar, -) -> Result<(CookieJar, Html)> { + req_headers: HeaderMap, +) -> Result { let (jar, csrf) = ensure_csrf(jar); let page: i64 = params @@ -98,16 +100,15 @@ pub async fn board_index( .unwrap_or(1) .max(1); - let html = tokio::task::spawn_blocking({ + let (jar, csrf) = (jar, csrf); + + let result = tokio::task::spawn_blocking({ let pool = state.db.clone(); let csrf_clone = csrf.clone(); - // FIX[HIGH-2]: is_admin_session check moved inside spawn_blocking so - // the blocking DB call does not stall the Tokio worker thread. let jar_session = jar.get("chan_admin_session").map(|c| c.value().to_string()); - move || -> Result { + move || -> Result<(String, String)> { let conn = pool.get()?; - // Resolve admin status inside the blocking task let is_admin = jar_session .as_deref() .map(|sid| db::get_session(&conn, sid).ok().flatten().is_some()) @@ -121,6 +122,12 @@ pub async fn board_index( let threads = db::get_threads_for_board(&conn, board.id, THREADS_PER_PAGE, pagination.offset())?; + // 3.2: Derive ETag from the most-recently-bumped thread on this page + // combined with the page number. This is a cheap proxy for "has + // anything on this page changed?". + let max_bump = threads.iter().map(|t| t.bumped_at).max().unwrap_or(0); + let etag = format!("\"{}-{}\"", max_bump, page); + let mut summaries = Vec::with_capacity(threads.len()); for thread in threads { let total_replies = thread.reply_count; @@ -135,7 +142,7 @@ pub async fn board_index( let all_boards = db::get_all_boards(&conn)?; let collapse_greentext = db::get_collapse_greentext(&conn); - Ok(templates::board_page( + let html = templates::board_page( &board, &summaries, &pagination, @@ -144,13 +151,38 @@ pub async fn board_index( is_admin, None, collapse_greentext, - )) + ); + Ok((etag, html)) } }) .await .map_err(|e| AppError::Internal(anyhow::anyhow!(e)))??; - Ok((jar, Html(html))) + let (etag, html) = result; + + // 3.2: Return 304 Not Modified when the client's cached version is current. + let client_etag = req_headers + .get("if-none-match") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if client_etag == etag { + let mut resp = axum::http::Response::builder() + .status(axum::http::StatusCode::NOT_MODIFIED) + .body(axum::body::Body::empty()) + .unwrap_or_default(); + resp.headers_mut().insert( + "etag", + axum::http::HeaderValue::from_str(&etag) + .unwrap_or_else(|_| axum::http::HeaderValue::from_static("\"0\"")), + ); + return Ok((jar, resp).into_response()); + } + + let mut resp = Html(html).into_response(); + if let Ok(v) = axum::http::HeaderValue::from_str(&etag) { + resp.headers_mut().insert("etag", v); + } + Ok((jar, resp).into_response()) } // ─── POST /:board/ — create new thread ─────────────────────────────────────── @@ -805,8 +837,20 @@ pub async fn redirect_to_post( Redirect::to(&url).into_response() } _ => { - // Post not found or wrong board — return a plain 404. - axum::http::StatusCode::NOT_FOUND.into_response() + // Post not found or wrong board — render the error page template + // so the user gets a readable message instead of a blank HTTP 404. + // This is the fallback path when JavaScript is disabled or when + // a user manually navigates to a quotelink URL after a board + // restore that assigned new IDs to the restored posts. + let html = crate::templates::error_page( + 404, + &format!("Post #{} not found. It may have been deleted or the board was restored from a backup.", post_id), + ); + ( + axum::http::StatusCode::NOT_FOUND, + axum::response::Html(html), + ) + .into_response() } } } diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs index ec211b7..bf62042 100644 --- a/src/handlers/mod.rs +++ b/src/handlers/mod.rs @@ -7,11 +7,47 @@ pub mod thread; // Both create_thread and post_reply parse the same multipart fields. // This helper consolidates that duplicated logic into one place. +use crate::config::CONFIG; use crate::error::{AppError, Result}; use crate::middleware::validate_csrf; use crate::workers::JobQueue; use axum::extract::Multipart; +// ─── Streaming multipart size limit ────────────────────────────────────────── +// +// 3.1: The previous implementation called `field.bytes().await` which buffers +// the entire file in memory before any size check, allowing a malicious client +// to exhaust server RAM with a multi-GB upload. +// +// `read_field_bytes` replaces it with a streaming read that accumulates chunks +// and aborts — returning HTTP 413 — the moment the running total exceeds the +// configured limit. The limit used is the largest allowed media size so that +// any single field is capped. +// +// Text fields (CSRF token, post body, …) are routed through `field.text()` +// which is bounded by axum's body length limit set in the router layer. + +async fn read_field_bytes( + mut field: axum::extract::multipart::Field<'_>, + max_bytes: usize, +) -> Result> { + let mut buf: Vec = Vec::new(); + while let Some(chunk) = field + .chunk() + .await + .map_err(|e| AppError::BadRequest(e.to_string()))? + { + if buf.len() + chunk.len() > max_bytes { + return Err(AppError::UploadTooLarge(format!( + "File too large. Maximum upload size is {} MiB.", + max_bytes / 1024 / 1024 + ))); + } + buf.extend_from_slice(&chunk); + } + Ok(buf) +} + /// Parsed fields from a post/thread creation multipart form. pub struct PostFormData { pub csrf_verified: bool, @@ -112,22 +148,17 @@ pub async fn parse_post_multipart( } Some("file") => { let fname = field.file_name().unwrap_or("upload").to_string(); - let bytes = field - .bytes() - .await - .map_err(|e| AppError::BadRequest(format!("File read error: {e}")))?; - if !bytes.is_empty() { - file = Some((bytes.to_vec(), fname)); + let max = CONFIG.max_video_size.max(CONFIG.max_audio_size); + let data = read_field_bytes(field, max).await?; + if !data.is_empty() { + file = Some((data, fname)); } } Some("audio_file") => { let fname = field.file_name().unwrap_or("audio").to_string(); - let bytes = field - .bytes() - .await - .map_err(|e| AppError::BadRequest(format!("Audio file read error: {e}")))?; - if !bytes.is_empty() { - audio_file = Some((bytes.to_vec(), fname)); + let data = read_field_bytes(field, CONFIG.max_audio_size).await?; + if !data.is_empty() { + audio_file = Some((data, fname)); } } _ => { diff --git a/src/handlers/thread.rs b/src/handlers/thread.rs index 5a4ae3f..475d07d 100644 --- a/src/handlers/thread.rs +++ b/src/handlers/thread.rs @@ -22,6 +22,7 @@ use crate::{ }; use axum::{ extract::{Form, Multipart, Path, Query, State}, + http::HeaderMap, response::{Html, IntoResponse, Redirect, Response}, }; use axum_extra::extract::cookie::CookieJar; @@ -35,14 +36,15 @@ pub async fn view_thread( Path((board_short, thread_id)): Path<(String, i64)>, crate::middleware::ClientIp(client_ip): crate::middleware::ClientIp, jar: CookieJar, -) -> Result<(CookieJar, Html)> { + req_headers: HeaderMap, +) -> Result { let (jar, csrf) = ensure_csrf(jar); - let html = tokio::task::spawn_blocking({ + let result = tokio::task::spawn_blocking({ let pool = state.db.clone(); let csrf_clone = csrf.clone(); let jar_session = jar.get("chan_admin_session").map(|c| c.value().to_string()); - move || -> Result { + move || -> Result<(String, String)> { let conn = pool.get()?; let is_admin = jar_session @@ -60,16 +62,23 @@ pub async fn view_thread( return Err(AppError::NotFound("Thread not found in this board.".into())); } + // ETag derived from the thread's last-bump timestamp AND the + // current board-list version. The board version component ensures + // that adding or deleting a board invalidates cached thread pages, + // so the nav bar always reflects the current board list rather than + // showing stale/deleted boards until the thread receives a reply. + let boards_ver = crate::templates::live_boards_version(); + let etag = format!("\"{}-b{}\"", thread.bumped_at, boards_ver); + let posts = db::get_posts_for_thread(&conn, thread_id)?; let all_boards = db::get_all_boards(&conn)?; - // Compute ip_hash for poll vote status let ip_hash = crate::utils::crypto::hash_ip(&client_ip, &crate::config::CONFIG.cookie_secret); let poll = db::get_poll_for_thread(&conn, thread_id, &ip_hash)?; let collapse_greentext = db::get_collapse_greentext(&conn); - Ok(crate::templates::thread_page( + let html = crate::templates::thread_page( &board, &thread, &posts, @@ -79,13 +88,38 @@ pub async fn view_thread( poll.as_ref(), None, collapse_greentext, - )) + ); + Ok((etag, html)) } }) .await .map_err(|e| AppError::Internal(anyhow::anyhow!(e)))??; - Ok((jar, Html(html))) + let (etag, html) = result; + + // 3.2: Return 304 Not Modified when client's cached copy is still current. + let client_etag = req_headers + .get("if-none-match") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if client_etag == etag { + let mut resp = axum::http::Response::builder() + .status(axum::http::StatusCode::NOT_MODIFIED) + .body(axum::body::Body::empty()) + .unwrap_or_default(); + resp.headers_mut().insert( + "etag", + axum::http::HeaderValue::from_str(&etag) + .unwrap_or_else(|_| axum::http::HeaderValue::from_static("\"0\"")), + ); + return Ok((jar, resp).into_response()); + } + + let mut resp = Html(html).into_response(); + if let Ok(v) = axum::http::HeaderValue::from_str(&etag) { + resp.headers_mut().insert("etag", v); + } + Ok((jar, resp).into_response()) } // ─── POST /:board/thread/:id — post reply ──────────────────────────────────── @@ -658,10 +692,32 @@ pub async fn thread_updates( .await .map_err(|e| crate::error::AppError::Internal(anyhow::anyhow!(e)))??; + // Current board-list version + rendered nav links — lets the JS refresh + // the nav bar when boards are added or deleted while a thread is open, + // without requiring a full page reload. + let boards_version = crate::templates::live_boards_version(); + let boards = crate::templates::live_boards_snapshot(); + let nav_inner: String = boards + .iter() + .map(|b| { + format!( + r#"{s}"#, + s = crate::utils::sanitize::escape_html(&b.short_name) + ) + }) + .collect::>() + .join(" / "); + let nav_html = if nav_inner.is_empty() { + String::new() + } else { + format!("[ {} ]", nav_inner) + }; + // Build a JSON envelope with new-post HTML plus current thread state. - // The client consumes the state fields to keep the nav bar in sync. + // boards_version / nav_html let the client keep the nav bar in sync when + // boards are added or deleted while the user has a thread open. let json = format!( - r#"{{"html":{html_json},"last_id":{last_id},"count":{count},"reply_count":{reply_count},"bump_time":{bump_time},"locked":{locked},"sticky":{sticky}}}"#, + r#"{{"html":{html_json},"last_id":{last_id},"count":{count},"reply_count":{reply_count},"bump_time":{bump_time},"locked":{locked},"sticky":{sticky},"boards_version":{boards_version},"nav_html":{nav_html_json}}}"#, html_json = serde_json::to_string(&html).unwrap_or_else(|_| "\"\"".to_string()), last_id = last_id, count = count, @@ -669,6 +725,8 @@ pub async fn thread_updates( bump_time = bump_time, locked = locked, sticky = sticky, + boards_version = boards_version, + nav_html_json = serde_json::to_string(&nav_html).unwrap_or_else(|_| "\"\"".to_string()), ); Ok(([(header::CONTENT_TYPE, "application/json")], json).into_response()) diff --git a/src/main.rs b/src/main.rs index aae7c44..20a9fad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -136,9 +136,14 @@ enum AdminAction { } // ─── Entry point ───────────────────────────────────────────────────────────── +// +// 3.5: `#[tokio::main]` does not expose `max_blocking_threads`, so we build +// the runtime manually. The blocking thread pool (used by every +// `spawn_blocking` call — page renders, DB queries, file I/O) defaults to +// logical CPUs × 4 but can be tuned via `blocking_threads` in settings.toml +// or the CHAN_BLOCKING_THREADS environment variable. -#[tokio::main] -async fn main() -> anyhow::Result<()> { +fn main() -> anyhow::Result<()> { fmt::fmt() .with_env_filter( EnvFilter::try_from_default_env() @@ -148,13 +153,29 @@ async fn main() -> anyhow::Result<()> { .compact() .init(); + // CONFIG must be initialised before building the runtime so that + // blocking_threads is available. This is safe because CONFIG is a + // Lazy that initialises itself on first access. + let blocking_threads = CONFIG.blocking_threads; + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .max_blocking_threads(blocking_threads) + .build() + .expect("Failed to build Tokio runtime"); + let cli = Cli::parse(); - match cli.command { - None | Some(Command::Serve { port: None }) => run_server(None).await, - Some(Command::Serve { port }) => run_server(port).await, - Some(Command::Admin { action }) => run_admin(action), - } + rt.block_on(async move { + match cli.command { + None | Some(Command::Serve { port: None }) => run_server(None).await, + Some(Command::Serve { port }) => run_server(port).await, + Some(Command::Admin { action }) => { + run_admin(action)?; + Ok(()) + } + } + }) } // ─── Server mode ───────────────────────────────────────────────────────────── @@ -270,6 +291,11 @@ async fn run_server(port_override: Option) -> anyhow::Result<()> { default_theme }; templates::set_live_default_theme(&default_theme); + + // Seed the live board list used by error pages and ban pages. + if let Ok(boards) = db::get_all_boards(&conn) { + templates::set_live_boards(boards); + } } } // ── External tool detection ──────────────────────────────────────────────── @@ -378,6 +404,94 @@ async fn run_server(port_override: Option) -> anyhow::Result<()> { } }); + // 1.6: Scheduled database VACUUM — reclaim disk space from deleted posts + // and threads without requiring manual admin intervention. + if CONFIG.auto_vacuum_interval_hours > 0 { + let bg = pool.clone(); + let interval_secs = CONFIG.auto_vacuum_interval_hours * 3600; + tokio::spawn(async move { + // Stagger the first run by half the interval to avoid hammering the + // DB immediately at startup alongside WAL checkpoint and session purge. + tokio::time::sleep(Duration::from_secs(interval_secs / 2 + 7)).await; + let mut iv = tokio::time::interval(Duration::from_secs(interval_secs)); + loop { + iv.tick().await; + let bg2 = bg.clone(); + tokio::task::spawn_blocking(move || { + if let Ok(conn) = bg2.get() { + let before = db::get_db_size_bytes(&conn).unwrap_or(0); + match db::run_vacuum(&conn) { + Ok(()) => { + let after = db::get_db_size_bytes(&conn).unwrap_or(0); + let saved = before.saturating_sub(after); + info!( + "Scheduled VACUUM complete: {} → {} bytes ({} reclaimed)", + before, after, saved + ); + } + Err(e) => tracing::warn!("Scheduled VACUUM failed: {}", e), + } + } + }) + .await + .ok(); + } + }); + } + + // 1.7: Expired poll vote cleanup — purge per-IP vote rows for polls whose + // expiry is older than poll_cleanup_interval_hours, preventing the + // poll_votes table from growing indefinitely. + if CONFIG.poll_cleanup_interval_hours > 0 { + let bg = pool.clone(); + let interval_secs = CONFIG.poll_cleanup_interval_hours * 3600; + tokio::spawn(async move { + tokio::time::sleep(Duration::from_secs(600)).await; // initial delay + let mut iv = tokio::time::interval(Duration::from_secs(interval_secs)); + loop { + iv.tick().await; + let bg2 = bg.clone(); + let retention_cutoff_secs = interval_secs as i64; + tokio::task::spawn_blocking(move || { + if let Ok(conn) = bg2.get() { + let cutoff = chrono::Utc::now().timestamp() - retention_cutoff_secs; + match db::cleanup_expired_poll_votes(&conn, cutoff) { + Ok(n) if n > 0 => { + info!("Poll vote cleanup: removed {} expired vote row(s)", n) + } + Ok(_) => {} + Err(e) => tracing::warn!("Poll vote cleanup failed: {}", e), + } + } + }) + .await + .ok(); + } + }); + } + + // 2.6: Waveform/thumbnail cache eviction — keep total size of all thumbs + // directories under CONFIG.waveform_cache_max_bytes by deleting the oldest + // files when the threshold is exceeded. Waveform PNGs can be regenerated + // by re-enqueueing the AudioWaveform job; image thumbnails can be + // regenerated from the originals. Uses 1-hour intervals. + if CONFIG.waveform_cache_max_bytes > 0 { + let max_bytes = CONFIG.waveform_cache_max_bytes; + tokio::spawn(async move { + tokio::time::sleep(Duration::from_secs(1800)).await; // initial stagger + let mut iv = tokio::time::interval(Duration::from_secs(3600)); + loop { + iv.tick().await; + let upload_dir = CONFIG.upload_dir.clone(); + tokio::task::spawn_blocking(move || { + evict_thumb_cache(&upload_dir, max_bytes); + }) + .await + .ok(); + } + }); + } + let app = build_router(state); let listener = tokio::net::TcpListener::bind(&bind_addr).await?; info!("Listening on http://{}", bind_addr); @@ -561,6 +675,12 @@ fn build_router(state: AppState) -> Router { .layer(axum_middleware::from_fn(middleware::rate_limit_middleware)) .layer(DefaultBodyLimit::max(CONFIG.max_video_size)) .layer(axum_middleware::from_fn(track_requests)) + // 3.3: Gzip/Brotli/Zstd response compression. HTML pages compress 5–10× + // with gzip and even better with Brotli. tower-http respects the client's + // Accept-Encoding header and negotiates the best supported algorithm. + // Applied before the trailing-slash normaliser so compressed responses + // are served correctly for all paths including redirects. + .layer(tower_http::compression::CompressionLayer::new()) // Normalize trailing slashes before routing: redirect /path/ → /path (301). // Applied last (outermost) so it fires before any other middleware sees the URI. .layer(axum_middleware::from_fn( @@ -1474,6 +1594,82 @@ fn run_admin(action: AdminAction) -> anyhow::Result<()> { Ok(()) } +// ─── 2.6 Waveform/thumbnail cache eviction ──────────────────────────────────── + +/// Walk every board's `thumbs/` subdirectory, collect all files with their +/// modification times, and delete the oldest ones until the total size of +/// the remaining set is under `max_bytes`. +/// +/// Only files inside `{upload_dir}/{board}/thumbs/` are considered — original +/// uploads are never touched. Deletion is best-effort: individual failures +/// are logged and skipped rather than aborting the whole pass. +fn evict_thumb_cache(upload_dir: &str, max_bytes: u64) { + // Collect (mtime_secs, path, size) for every file inside any thumbs/ dir. + let mut files: Vec<(u64, std::path::PathBuf, u64)> = Vec::new(); + let Ok(boards_iter) = std::fs::read_dir(upload_dir) else { + return; + }; + for board_entry in boards_iter.flatten() { + let thumbs_dir = board_entry.path().join("thumbs"); + if !thumbs_dir.is_dir() { + continue; + } + let Ok(thumbs_iter) = std::fs::read_dir(&thumbs_dir) else { + continue; + }; + for entry in thumbs_iter.flatten() { + let path = entry.path(); + if let Ok(meta) = entry.metadata() { + if meta.is_file() { + let mtime = meta + .modified() + .ok() + .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|d| d.as_secs()) + .unwrap_or(0); + files.push((mtime, path, meta.len())); + } + } + } + } + + let total: u64 = files.iter().map(|(_, _, sz)| sz).sum(); + if total <= max_bytes { + return; // already within budget + } + + // Sort oldest-first so we delete the least-recently-used files first. + files.sort_unstable_by_key(|(mtime, _, _)| *mtime); + + let mut remaining = total; + let mut deleted = 0u64; + let mut deleted_bytes = 0u64; + for (_, path, size) in &files { + if remaining <= max_bytes { + break; + } + match std::fs::remove_file(path) { + Ok(()) => { + remaining = remaining.saturating_sub(*size); + deleted += 1; + deleted_bytes += size; + } + Err(e) => { + tracing::warn!("evict_thumb_cache: failed to delete {:?}: {}", path, e); + } + } + } + if deleted > 0 { + info!( + "evict_thumb_cache: removed {} file(s) ({} KiB), cache now {} KiB / {} KiB limit", + deleted, + deleted_bytes / 1024, + remaining / 1024, + max_bytes / 1024, + ); + } +} + fn validate_password(p: &str) -> anyhow::Result<()> { if p.len() < 8 { anyhow::bail!("Password must be at least 8 characters."); diff --git a/src/templates/admin.rs b/src/templates/admin.rs index 8ba7ecf..0b8fff3 100644 --- a/src/templates/admin.rs +++ b/src/templates/admin.rs @@ -50,6 +50,7 @@ pub fn admin_panel_page( full_backups: &[BackupInfo], board_backups: &[BackupInfo], db_size_bytes: i64, + db_size_warning: bool, reports: &[crate::models::ReportWithContext], appeals: &[crate::models::BanAppeal], site_name: &str, @@ -372,6 +373,20 @@ pub fn admin_panel_page( None => String::new(), }; + // 1.8: DB size warning banner — shown when the file exceeds the configured threshold. + let db_warn_banner = if db_size_warning { + format!( + r#"
+⚠ Database size warning: The database file has exceeded the configured +warning threshold ({size}). Consider running VACUUM below or archiving +old boards to prevent query performance degradation. +
"#, + size = format_file_size(db_size_bytes), + ) + } else { + String::new() + }; + let body = format!( r#"
{flash} @@ -543,7 +558,7 @@ pub fn admin_panel_page( ═══════════════════════════════════════════════════════════════════════════ -->

// database maintenance

-

+{db_warn_banner}

Current database size: {db_size_str}. Running VACUUM rewrites the database file compactly, reclaiming space left after bulk deletions (deleted threads, pruned posts, etc.). This may take a few seconds on large @@ -584,6 +599,7 @@ pub fn admin_panel_page( full_backup_rows = full_backup_rows, board_backup_rows = board_backup_rows, db_size_str = format_file_size(db_size_bytes), + db_warn_banner = db_warn_banner, report_rows = report_rows, report_badge = report_badge, appeal_rows = appeal_rows, diff --git a/src/templates/mod.rs b/src/templates/mod.rs index 795e51d..151adfe 100644 --- a/src/templates/mod.rs +++ b/src/templates/mod.rs @@ -20,6 +20,7 @@ use crate::utils::sanitize::escape_html; use chrono::{TimeZone, Utc}; use once_cell::sync::Lazy; use parking_lot::RwLock; +use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; pub mod admin; @@ -50,6 +51,48 @@ static LIVE_SITE_SUBTITLE: Lazy>> = /// change without requiring a server restart or extra DB round-trip per request. static LIVE_DEFAULT_THEME: Lazy>> = Lazy::new(|| RwLock::new(Arc::from(""))); +/// In-memory cache of the current board list, used by standalone pages (error +/// pages, ban pages) that don't have DB access at render time. Updated by +/// every handler that creates, deletes, or restores boards. +/// +/// Stores a snapshot; stale for at most one request after a board change, but +/// that one request is itself the mutating POST which redirects anyway. +static LIVE_BOARDS: Lazy>>> = + Lazy::new(|| RwLock::new(Arc::new(Vec::new()))); + +/// Monotonically-increasing counter incremented every time the board list +/// changes. Included in thread-page ETags so that adding or deleting a board +/// correctly invalidates cached thread pages (fixing stale nav bars). +static LIVE_BOARDS_VERSION: AtomicU64 = AtomicU64::new(0); + +/// Replace the in-memory board list. Call after any board create / delete / +/// restore operation so that error_page() renders the correct top-bar links. +pub fn set_live_boards(boards: Vec) { + *LIVE_BOARDS.write() = Arc::new(boards); + // Bump the version so thread-page ETags incorporate board-list changes. + // This ensures adding/deleting a board invalidates cached thread pages, + // fixing the stale nav bug where deleted boards persisted in the browser + // cache until an unrelated reply bumped the thread. + LIVE_BOARDS_VERSION.fetch_add(1, Ordering::Relaxed); +} + +pub(super) fn live_boards() -> Arc> { + Arc::clone(&*LIVE_BOARDS.read()) +} + +/// Public snapshot of the live board list. Used by the thread-updates handler +/// to include current nav HTML in polling responses so the JS can refresh the +/// nav bar when boards change while a thread is open. +pub fn live_boards_snapshot() -> Arc> { + Arc::clone(&*LIVE_BOARDS.read()) +} + +/// Current board-list version. Included in thread-page ETags so that board +/// mutations invalidate cached thread HTML (and thus stale nav bars). +pub fn live_boards_version() -> u64 { + LIVE_BOARDS_VERSION.load(Ordering::Relaxed) +} + /// Call this at startup (after first DB read) and after admin saves a new name. pub fn set_live_site_name(name: &str) { let val: Arc = if name.trim().is_empty() { @@ -414,32 +457,25 @@ appeals are reviewed by site staff. one appeal per 24 hours.

} pub fn error_page(code: u16, message: &str) -> String { - let default_theme = live_default_theme(); - let default_theme_attr = if !default_theme.is_empty() && &*default_theme != "terminal" { - format!(r#" data-default-theme="{}""#, escape_html(&default_theme)) - } else { - String::new() - }; - format!( - r#" - - - - -Error {code} - - - - -
+ // Use base_layout so the error page has the same header, theme picker, + // and board navigation as every other page. live_boards() is always + // up-to-date because every board mutation refreshes the cache. + let boards = live_boards(); + let body = format!( + r#"

error {code}

{message}

return home

-
- -"#, - default_theme_attr = default_theme_attr, +
"#, code = code, message = escape_html(message), + ); + base_layout( + &format!("Error {code}"), + None, + &body, + "", // no CSRF — error page has no forms that need it + &boards, + false, ) } diff --git a/src/utils/files.rs b/src/utils/files.rs index 03ab90d..5bc94eb 100644 --- a/src/utils/files.rs +++ b/src/utils/files.rs @@ -813,6 +813,58 @@ fn generate_audio_placeholder(output_path: &PathBuf) -> Result<()> { // ─── Image thumbnail ────────────────────────────────────────────────────────── +/// Read the EXIF Orientation tag from JPEG bytes. +/// +/// Returns the orientation value (1–8) or 1 (no rotation) if the tag is +/// absent or unreadable. Only JPEG files carry reliable EXIF orientation; +/// PNG/WebP/GIF do not use this tag. +/// +/// Values follow the EXIF spec: +/// 1 = normal (0°), 2 = flip-H, 3 = 180°, 4 = flip-V, +/// 5 = transpose, 6 = 90° CW, 7 = transverse, 8 = 90° CCW +fn read_exif_orientation(data: &[u8]) -> u32 { + use std::io::Cursor; + let Ok(exif) = exif::Reader::new().read_from_container(&mut Cursor::new(data)) else { + return 1; + }; + exif.get_field(exif::Tag::Orientation, exif::In::PRIMARY) + .and_then(|f| { + if let exif::Value::Short(ref v) = f.value { + v.first().copied().map(|n| n as u32) + } else { + None + } + }) + .unwrap_or(1) +} + +/// Apply an EXIF orientation transformation to a decoded `DynamicImage`. +/// +/// This corrects the pixel layout so that thumbnails appear upright regardless +/// of which way the camera was held when the photo was taken. The `image` +/// crate operations used here are pure in-memory pixel transforms — no I/O. +fn apply_exif_orientation(img: image::DynamicImage, orientation: u32) -> image::DynamicImage { + use image::imageops; + match orientation { + 2 => image::DynamicImage::ImageRgba8(imageops::flip_horizontal(&img)), + 3 => image::DynamicImage::ImageRgba8(imageops::rotate180(&img)), + 4 => image::DynamicImage::ImageRgba8(imageops::flip_vertical(&img)), + 5 => { + // Transpose = rotate 90° CW then flip horizontally + let rot = imageops::rotate90(&img); + image::DynamicImage::ImageRgba8(imageops::flip_horizontal(&rot)) + } + 6 => image::DynamicImage::ImageRgba8(imageops::rotate90(&img)), + 7 => { + // Transverse = rotate 90° CW then flip vertically + let rot = imageops::rotate90(&img); + image::DynamicImage::ImageRgba8(imageops::flip_vertical(&rot)) + } + 8 => image::DynamicImage::ImageRgba8(imageops::rotate270(&img)), + _ => img, // 1 = normal, or unknown value — no transform + } +} + fn generate_image_thumb( data: &[u8], mime_type: &str, @@ -830,6 +882,21 @@ fn generate_image_thumb( let img = image::load_from_memory_with_format(data, format).context("Failed to decode image")?; + // 4.1: EXIF orientation correction — phones store images unrotated and rely + // on the Orientation tag to tell viewers which way is up. We read the tag + // before thumbnailing and apply the corresponding pixel transform so that + // thumbnails display upright without depending on viewer-side EXIF support. + // Only JPEG carries reliable EXIF orientation data. + let img = if mime_type == "image/jpeg" { + let orientation = read_exif_orientation(data); + if orientation > 1 { + tracing::debug!("EXIF orientation {} applied to JPEG thumbnail", orientation); + } + apply_exif_orientation(img, orientation) + } else { + img + }; + let (w, h) = img.dimensions(); let (tw, th) = if w > h { let r = max_dim as f32 / w as f32; diff --git a/src/workers/mod.rs b/src/workers/mod.rs index 937a9fa..f1d84c3 100644 --- a/src/workers/mod.rs +++ b/src/workers/mod.rs @@ -28,6 +28,7 @@ use crate::config::CONFIG; use crate::db::DbPool; use anyhow::Result; +use dashmap::DashMap; use rand_core::{OsRng, RngCore}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; @@ -42,10 +43,6 @@ use tracing::{debug, error, info, warn}; const MAX_ATTEMPTS: i64 = 3; // How long a worker sleeps when the queue is empty. const POLL_INTERVAL: Duration = Duration::from_secs(5); -// Maximum wall-clock time allowed for a single ffmpeg transcode (#10). -const FFMPEG_TRANSCODE_TIMEOUT: Duration = Duration::from_secs(120); -// Maximum wall-clock time allowed for ffmpeg waveform generation (#10). -const FFMPEG_WAVEFORM_TIMEOUT: Duration = Duration::from_secs(60); // ─── Job definitions ────────────────────────────────────────────────────────── @@ -105,6 +102,11 @@ pub struct JobQueue { pub notify: Arc, /// Token cancelled at shutdown; workers observe this to exit cleanly. pub cancel: CancellationToken, + /// 2.2: Set of file_path strings for media jobs currently being processed. + /// Workers check this before starting a VideoTranscode or AudioWaveform job + /// and skip if the same path is already in flight, preventing redundant + /// FFmpeg invocations on client retries or server-restart re-queues. + pub in_progress: Arc>, } impl JobQueue { @@ -113,14 +115,37 @@ impl JobQueue { pool, notify: Arc::new(Notify::new()), cancel: CancellationToken::new(), + in_progress: Arc::new(DashMap::new()), } } /// Persist a job and wake a sleeping worker immediately. /// Safe to call from any thread, including inside tokio::task::spawn_blocking. + /// + /// 2.1: If the number of pending jobs already equals or exceeds + /// `CONFIG.job_queue_capacity`, the job is dropped with a warning log + /// rather than accepted. This prevents the queue table growing without + /// bound under a post flood. pub fn enqueue(&self, job: &Job) -> Result { let payload = serde_json::to_string(job)?; let conn = self.pool.get()?; + + // Back-pressure: check pending count before inserting. + if CONFIG.job_queue_capacity > 0 { + let pending = crate::db::pending_job_count(&conn).unwrap_or(0); + if pending as u64 >= CONFIG.job_queue_capacity { + warn!( + "Job queue at capacity ({}/{}) — dropping {} job", + pending, + CONFIG.job_queue_capacity, + job.type_str(), + ); + // Return a sentinel -1 rather than an error so callers that + // fire-and-forget don't bubble up a spurious error. + return Ok(-1); + } + } + let id = crate::db::enqueue_job(&conn, job.type_str(), &payload)?; self.notify.notify_one(); Ok(id) @@ -204,8 +229,14 @@ async fn worker_loop(id: usize, queue: Arc, ffmpeg_available: bool) { consecutive_errors = 0; // reset back-off on any successful claim debug!("Worker {}: picked up job #{}", id, job_id); let pool_done = queue.pool.clone(); - let result = - handle_job(job_id, &payload, ffmpeg_available, queue.pool.clone()).await; + let result = handle_job( + job_id, + &payload, + ffmpeg_available, + queue.pool.clone(), + queue.in_progress.clone(), + ) + .await; // FIX[STUCK-RUNNING]: Previously pool_done.get() failures were // silently ignored (if let Ok(c) = ...), leaving the job row // permanently stuck in "running" — claim_next_job only claims @@ -308,6 +339,7 @@ async fn handle_job( payload: &str, ffmpeg_available: bool, pool: DbPool, + in_progress: Arc>, ) -> Result<()> { let job: Job = serde_json::from_str(payload) .map_err(|e| anyhow::anyhow!("Cannot deserialise job #{}: {}", job_id, e))?; @@ -319,13 +351,53 @@ async fn handle_job( post_id, file_path, board_short, - } => transcode_video(post_id, file_path, board_short, ffmpeg_available, pool).await, + } => { + // 2.2: Skip if this file_path is already being processed. + if in_progress.contains_key(&file_path) { + warn!( + "VideoTranscode: skipping duplicate job for post {} ({}): already in flight", + post_id, file_path + ); + return Ok(()); + } + in_progress.insert(file_path.clone(), true); + let result = transcode_video( + post_id, + file_path.clone(), + board_short, + ffmpeg_available, + pool, + ) + .await; + in_progress.remove(&file_path); + result + } Job::AudioWaveform { post_id, file_path, board_short, - } => generate_waveform(post_id, file_path, board_short, ffmpeg_available, pool).await, + } => { + // 2.2: Skip if this file_path is already being processed. + if in_progress.contains_key(&file_path) { + warn!( + "AudioWaveform: skipping duplicate job for post {} ({}): already in flight", + post_id, file_path + ); + return Ok(()); + } + in_progress.insert(file_path.clone(), true); + let result = generate_waveform( + post_id, + file_path.clone(), + board_short, + ffmpeg_available, + pool, + ) + .await; + in_progress.remove(&file_path); + result + } Job::ThreadPrune { board_id, @@ -347,7 +419,7 @@ async fn handle_job( /// Transcode an MP4 upload to WebM (VP9 + Opus), then update the post's /// file_path and mime_type. The original MP4 is deleted on success. /// -/// A hard timeout of FFMPEG_TRANSCODE_TIMEOUT is applied (#10). +/// A hard timeout of CONFIG.ffmpeg_timeout_secs is applied (2.3). async fn transcode_video( post_id: i64, file_path: String, @@ -363,9 +435,12 @@ async fn transcode_video( return Ok(()); } - // Wrap the entire blocking transcode in a timeout (#10). + let timeout_secs = CONFIG.ffmpeg_timeout_secs; + let ffmpeg_timeout = Duration::from_secs(timeout_secs); + + // Wrap the entire blocking transcode in a configurable timeout (2.3). match timeout( - FFMPEG_TRANSCODE_TIMEOUT, + ffmpeg_timeout, tokio::task::spawn_blocking(move || { transcode_video_inner(post_id, file_path, board_short, pool) }), @@ -377,12 +452,11 @@ async fn transcode_video( Err(_elapsed) => { warn!( "VideoTranscode: job for post {} timed out after {}s — ffmpeg killed", - post_id, - FFMPEG_TRANSCODE_TIMEOUT.as_secs() + post_id, timeout_secs, ); Err(anyhow::anyhow!( "ffmpeg transcode timed out after {}s", - FFMPEG_TRANSCODE_TIMEOUT.as_secs() + timeout_secs, )) } } @@ -530,7 +604,7 @@ fn transcode_video_inner( // ─── AudioWaveform ──────────────────────────────────────────────────────────── /// Generate a waveform PNG thumbnail for an audio upload via ffmpeg. -/// A hard timeout of FFMPEG_WAVEFORM_TIMEOUT is applied (#10). +/// A hard timeout of CONFIG.ffmpeg_timeout_secs is applied (2.3). async fn generate_waveform( post_id: i64, file_path: String, @@ -542,8 +616,11 @@ async fn generate_waveform( return Ok(()); } + let timeout_secs = CONFIG.ffmpeg_timeout_secs; + let ffmpeg_timeout = Duration::from_secs(timeout_secs); + match timeout( - FFMPEG_WAVEFORM_TIMEOUT, + ffmpeg_timeout, tokio::task::spawn_blocking(move || { generate_waveform_inner(post_id, file_path, board_short, pool) }), @@ -555,12 +632,11 @@ async fn generate_waveform( Err(_elapsed) => { warn!( "AudioWaveform: job for post {} timed out after {}s", - post_id, - FFMPEG_WAVEFORM_TIMEOUT.as_secs() + post_id, timeout_secs, ); Err(anyhow::anyhow!( "ffmpeg waveform timed out after {}s", - FFMPEG_WAVEFORM_TIMEOUT.as_secs() + timeout_secs, )) } } @@ -636,12 +712,18 @@ async fn prune_threads( ) -> Result<()> { tokio::task::spawn_blocking(move || { let conn = pool.get()?; - if allow_archive { + // 2.5: archive_before_prune acts as a global safety net — when true, + // overflow threads are always archived rather than hard-deleted, even + // on boards where allow_archive = false. This closes the silent data + // loss gap where a thread could disappear simply because a board hit + // its thread limit while the admin had not opted into archiving. + let do_archive = allow_archive || CONFIG.archive_before_prune; + if do_archive { let count = crate::db::archive_old_threads(&conn, board_id, max_threads)?; if count > 0 { info!( - "ThreadArchive: moved {} overflow thread(s) to archive in /{}/ (board_id={})", - count, board_short, board_id + "ThreadArchive: moved {} overflow thread(s) to archive in /{}/ (board_id={}, archive_before_prune={})", + count, board_short, board_id, CONFIG.archive_before_prune ); } } else { diff --git a/static/main.js b/static/main.js index 97382d9..ac1905d 100644 --- a/static/main.js +++ b/static/main.js @@ -415,6 +415,9 @@ function closeReportModal() { var board = container.dataset.board; var threadId = container.dataset.threadId; var lastId = parseInt(container.dataset.lastId, 10) || 0; + // Track the board-list version last seen so we only touch the DOM when it + // actually changes (avoids unnecessary reflow on every poll tick). + var lastBoardsVersion = -1; // Floating new-replies pill var pill = document.createElement('div'); @@ -480,6 +483,16 @@ function closeReportModal() { if (window._onNewPostsInserted) window._onNewPostsInserted(container); showPill(data.count); } + // Refresh nav bar if the board list changed since last poll. + // boards_version is a monotonic counter incremented server-side + // whenever a board is created, deleted, or restored. + if (data.boards_version !== undefined && data.boards_version !== lastBoardsVersion) { + lastBoardsVersion = data.boards_version; + if (data.nav_html !== undefined) { + var navEl = document.querySelector('nav.board-list'); + if (navEl) navEl.innerHTML = data.nav_html; + } + } setStatus('updated'); setTimeout(function () { setStatus(''); }, 2000); updating = false; @@ -650,6 +663,25 @@ function closeReportModal() { _popupTarget = null; } + // Show an inline "post not found" notice anchored to the clicked quotelink. + // Reuses the existing hover popup element so the style is identical to a + // real post preview — no new DOM structure needed. + function showMissingPostPopup(link, pid) { + clearTimeout(_hideTimer); + popup.innerHTML = + '
' + + ' ' + + '>>' + pid + ' — post not found' + + 'it may have been deleted' + + '
'; + popup.style.display = 'block'; + _popupTarget = null; + positionPopup(link); + // Auto-dismiss after 3 s so the user is not left with a stale tooltip. + clearTimeout(_hideTimer); + _hideTimer = setTimeout(hidePopup, 3000); + } + function wireQuotelinks(root) { root.querySelectorAll('a.quotelink[data-pid]').forEach(function (link) { var pid = link.getAttribute('data-pid'); @@ -657,7 +689,14 @@ function closeReportModal() { link.addEventListener('mouseleave', function () { _hideTimer = setTimeout(hidePopup, 120); }); link.addEventListener('click', function (e) { var target = document.getElementById('p' + pid); - if (!target) return; + if (!target) { + // Post is not on this page (deleted or in another thread). + // Prevent navigation and show an inline error anchored to the link. + e.preventDefault(); + e.stopPropagation(); + showMissingPostPopup(link, pid); + return; + } e.preventDefault(); var offset = target.getBoundingClientRect().top + window.pageYOffset - 60; window.scrollTo({ top: offset, behavior: 'smooth' }); @@ -677,7 +716,12 @@ function closeReportModal() { link.addEventListener('mouseleave', function () { _hideTimer = setTimeout(hidePopup, 120); }); link.addEventListener('click', function (e) { var target = document.getElementById('p' + pid); - if (!target) return; + if (!target) { + e.preventDefault(); + e.stopPropagation(); + showMissingPostPopup(link, pid); + return; + } e.preventDefault(); var offset = target.getBoundingClientRect().top + window.pageYOffset - 60; window.scrollTo({ top: offset, behavior: 'smooth' }); @@ -813,11 +857,37 @@ function closeReportModal() { function navigate(threadId) { window.location.href = '/' + board + '/thread/' + threadId + '#p' + pid; } + function showCbMissingError() { + var cbPopup = getCbPopup(); + if (!cbPopup) return; + cbPopup.innerHTML = + '
' + + ' ' + + '>>>/' + board + '/' + pid + ' — post not found' + + 'it may have been deleted' + + '
'; + cbPopup.style.display = 'block'; + positionCbPopup(link, cbPopup); + setTimeout(function () { if (cbPopup) cbPopup.style.display = 'none'; }, 3000); + } + // If we already know the thread ID from a prior hover-preview fetch, navigate directly. if (_cbCache[key] && _cbCache[key].thread_id) { navigate(_cbCache[key].thread_id); return; } + // If a prior fetch already confirmed the post is gone, show error inline. + if (_cbCache[key] && !_cbCache[key].thread_id) { showCbMissingError(); return; } fetch('/api/post/' + board + '/' + pid) .then(function (r) { return r.ok ? r.json() : Promise.reject(r.status); }) - .then(function (data) { navigate(data.thread_id || 0); }) - .catch(function () { navigate(0); }); + .then(function (data) { + if (data.thread_id) { + navigate(data.thread_id); + } else { + // API returned success but no thread_id — post is orphaned/deleted. + showCbMissingError(); + } + }) + .catch(function () { + // 404 or network error — post is gone; show error inline. + showCbMissingError(); + }); }); }); } diff --git a/static/style.css b/static/style.css index 08af907..5f3e6c7 100644 --- a/static/style.css +++ b/static/style.css @@ -676,6 +676,30 @@ main { .quotelink-popup .post { margin: 0; border: none; padding: 0; } .quotelink-popup .reply { margin: 0; border: none; } +/* ── Missing-post inline notice (shown when a >>pid target was deleted) ───── */ +.missing-post-notice { + display: flex; + flex-direction: column; + gap: 4px; + padding: 6px 4px; + color: var(--text-dim, #888); + font-size: 0.85rem; + line-height: 1.4; +} +.missing-post-notice strong { + color: var(--text, #ccc); +} +.missing-post-icon { + color: var(--red, #e05555); + font-style: normal; + margin-right: 4px; +} +.missing-post-sub { + font-size: 0.78rem; + color: var(--text-dim, #666); + margin-left: 18px; +} + /* ── Highlighted post ──────────────────────────────────────────────────────── */ .post-highlighted { outline: 2px solid var(--green, #00c840) !important; From 44854d6c2790a1bf37298e954747b59c24e9761b Mon Sep 17 00:00:00 2001 From: csd113 Date: Mon, 9 Mar 2026 19:47:30 -0700 Subject: [PATCH 4/7] database audit using opus 4.6 --- src/db/admin.rs | 282 ++++++++++++++++------- src/db/boards.rs | 346 ++++++++++++++++++----------- src/db/mod.rs | 300 ++++++++++++++++++------- src/db/posts.rs | 420 +++++++++++++++++++++++------------ src/db/threads.rs | 491 +++++++++++++++++++++++++---------------- src/handlers/thread.rs | 2 +- src/utils/crypto.rs | 321 +++++++++++++++++++++------ src/utils/tripcode.rs | 268 ++++++++++++++++++---- 8 files changed, 1688 insertions(+), 742 deletions(-) diff --git a/src/db/admin.rs b/src/db/admin.rs index 82e2199..4b7d086 100644 --- a/src/db/admin.rs +++ b/src/db/admin.rs @@ -3,6 +3,24 @@ // Covers: admin user & session management, bans, word filters, user reports, // moderation log, ban appeals, IP history, WAL checkpoint, VACUUM, DB size, // and the list_admins helper used by CLI tooling. +// +// FIX summary (from audit): +// HIGH-1 is_banned: switched to prepare_cached (hot path on every post submission) +// HIGH-2 get_word_filters: switched to prepare_cached (hot path on every post submission) +// HIGH-3 get_posts_by_ip_hash: removed unnecessary threads join; posts already carries board_id +// MED-4 create_admin, add_ban, add_word_filter, file_report, file_ban_appeal: +// INSERT … RETURNING id replaces execute + last_insert_rowid() +// MED-5 update_admin_password, remove_ban, resolve_report, dismiss_ban_appeal: +// added rows-affected checks so missing targets surface as errors +// MED-6 accept_ban_appeal: status='accepted' (was duplicating 'dismissed') +// already correct; doc comment clarified +// MED-7 file_report: added has_reported_post guard to prevent spam reports +// MED-8 has_recent_appeal / file_ban_appeal TOCTOU: documented; full fix +// requires a schema-level UNIQUE constraint +// LOW-9 Remaining bare prepare → prepare_cached throughout +// LOW-10 Added .context() on key operations +// LOW-11 get_db_size_bytes: added doc comment noting WAL file not included +// LOW-12 is_banned NULLS FIRST: added doc comment for SQLite ≥ 3.30.0 requirement use crate::models::*; use anyhow::{Context, Result}; @@ -29,30 +47,43 @@ pub fn get_admin_by_username( .optional()?) } +/// FIX[MED-4]: Replaced execute + last_insert_rowid() with INSERT … RETURNING id +/// to retrieve the new row id atomically in the same statement. pub fn create_admin(conn: &rusqlite::Connection, username: &str, hash: &str) -> Result { - conn.execute( - "INSERT INTO admin_users (username, password_hash) VALUES (?1, ?2)", - params![username, hash], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO admin_users (username, password_hash) VALUES (?1, ?2) RETURNING id", + params![username, hash], + |r| r.get(0), + ) + .context("Failed to create admin user")?; + Ok(id) } +/// FIX[MED-5]: Added rows-affected check — silently succeeding when the target +/// username doesn't exist made password-reset errors invisible to the operator. pub fn update_admin_password( conn: &rusqlite::Connection, username: &str, hash: &str, ) -> Result<()> { - conn.execute( - "UPDATE admin_users SET password_hash = ?1 WHERE username = ?2", - params![hash, username], - )?; + let n = conn + .execute( + "UPDATE admin_users SET password_hash = ?1 WHERE username = ?2", + params![hash, username], + ) + .context("Failed to update admin password")?; + if n == 0 { + anyhow::bail!("Admin user '{}' not found", username); + } Ok(()) } /// List all admin users (for CLI tooling). +/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. pub fn list_admins(conn: &rusqlite::Connection) -> Result> { let mut stmt = - conn.prepare("SELECT id, username, created_at FROM admin_users ORDER BY id ASC")?; + conn.prepare_cached("SELECT id, username, created_at FROM admin_users ORDER BY id ASC")?; let rows = stmt .query_map([], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)))? .collect::>>()?; @@ -81,7 +112,8 @@ pub fn create_session( conn.execute( "INSERT INTO admin_sessions (id, admin_id, expires_at) VALUES (?1, ?2, ?3)", params![session_id, admin_id, expires_at], - )?; + ) + .context("Failed to create admin session")?; Ok(()) } @@ -123,50 +155,62 @@ pub fn purge_expired_sessions(conn: &rusqlite::Connection) -> Result { // ─── Ban queries ────────────────────────────────────────────────────────────── +/// Check whether `ip_hash` is currently banned. Returns the ban reason if so. +/// +/// FIX[HIGH-1]: Switched to prepare_cached — this is called on every post +/// submission and was recompiling the statement on every call. +/// +/// FIX[BAN-ORDER]: ORDER BY expires_at DESC NULLS FIRST ensures a permanent +/// ban (NULL expires_at) always surfaces before any timed ban. +/// +/// Note: NULLS FIRST requires SQLite ≥ 3.30.0 (released 2019-10-04). pub fn is_banned(conn: &rusqlite::Connection, ip_hash: &str) -> Result> { let now = chrono::Utc::now().timestamp(); - // A ban with NULL expires_at is permanent. - // - // FIX[BAN-ORDER]: Previously there was no ORDER BY, so LIMIT 1 returned - // whichever row the query planner happened to visit first via the index. - // With multiple active bans (e.g. a timed ban and a permanent ban) the - // reason shown to the user was non-deterministic across restarts/VACUUM. - // We now order by expires_at DESC NULLS FIRST so a permanent ban (NULL) - // always surfaces first, and among timed bans the latest-expiring one wins. - let result: Option> = conn - .query_row( - "SELECT reason FROM bans WHERE ip_hash = ?1 - AND (expires_at IS NULL OR expires_at > ?2) - ORDER BY expires_at DESC NULLS FIRST - LIMIT 1", - params![ip_hash, now], - |r| r.get(0), - ) + let mut stmt = conn.prepare_cached( + "SELECT reason FROM bans WHERE ip_hash = ?1 + AND (expires_at IS NULL OR expires_at > ?2) + ORDER BY expires_at DESC NULLS FIRST + LIMIT 1", + )?; + let result: Option> = stmt + .query_row(params![ip_hash, now], |r| r.get(0)) .optional()?; - // Flatten: None = not banned; Some(r) = banned (r may be empty if no reason set) + // Flatten: None = not banned; Some(r) = banned (r may be None if no reason was set) Ok(result.map(|r| r.unwrap_or_default())) } +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). pub fn add_ban( conn: &rusqlite::Connection, ip_hash: &str, reason: &str, expires_at: Option, ) -> Result { - conn.execute( - "INSERT INTO bans (ip_hash, reason, expires_at) VALUES (?1, ?2, ?3)", - params![ip_hash, reason, expires_at], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO bans (ip_hash, reason, expires_at) VALUES (?1, ?2, ?3) RETURNING id", + params![ip_hash, reason, expires_at], + |r| r.get(0), + ) + .context("Failed to insert ban")?; + Ok(id) } +/// FIX[MED-5]: Returns an error when the target ban row does not exist, +/// making double-removes and stale ban-ids visible rather than silently succeeding. pub fn remove_ban(conn: &rusqlite::Connection, id: i64) -> Result<()> { - conn.execute("DELETE FROM bans WHERE id = ?1", params![id])?; + let n = conn + .execute("DELETE FROM bans WHERE id = ?1", params![id]) + .context("Failed to remove ban")?; + if n == 0 { + anyhow::bail!("Ban id {} not found", id); + } Ok(()) } +/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. pub fn list_bans(conn: &rusqlite::Connection) -> Result> { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT id, ip_hash, reason, expires_at, created_at FROM bans ORDER BY created_at DESC", )?; let bans = stmt @@ -185,8 +229,10 @@ pub fn list_bans(conn: &rusqlite::Connection) -> Result> { // ─── Word filter queries ────────────────────────────────────────────────────── +/// FIX[HIGH-2]: Switched to prepare_cached — called on every post submission +/// to apply word filters; recompiling the statement every time was wasteful. pub fn get_word_filters(conn: &rusqlite::Connection) -> Result> { - let mut stmt = conn.prepare("SELECT id, pattern, replacement FROM word_filters")?; + let mut stmt = conn.prepare_cached("SELECT id, pattern, replacement FROM word_filters")?; let filters = stmt .query_map([], |r| { Ok(WordFilter { @@ -199,16 +245,20 @@ pub fn get_word_filters(conn: &rusqlite::Connection) -> Result> Ok(filters) } +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). pub fn add_word_filter( conn: &rusqlite::Connection, pattern: &str, replacement: &str, ) -> Result { - conn.execute( - "INSERT INTO word_filters (pattern, replacement) VALUES (?1, ?2)", - params![pattern, replacement], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO word_filters (pattern, replacement) VALUES (?1, ?2) RETURNING id", + params![pattern, replacement], + |r| r.get(0), + ) + .context("Failed to insert word filter")?; + Ok(id) } pub fn remove_word_filter(conn: &rusqlite::Connection, id: i64) -> Result<()> { @@ -218,7 +268,36 @@ pub fn remove_word_filter(conn: &rusqlite::Connection, id: i64) -> Result<()> { // ─── Reports ────────────────────────────────────────────────────────────────── +/// Guard helper: returns true if `reporter_hash` has already filed a report +/// against `post_id` that is still open. +/// +/// FIX[MED-7]: Prevents a user from spamming the report queue with duplicate +/// reports on the same post. Called inside file_report before the INSERT. +/// +/// Note: There is a TOCTOU race between has_reported_post and the INSERT in +/// file_report (two concurrent requests can both pass the check). A full fix +/// would require a schema-level UNIQUE(post_id, reporter_hash) constraint, but +/// that would block re-reporting after a resolved report. The guard here is +/// sufficient to prevent accidental spam; deliberate concurrent abuse is +/// extremely unlikely in practice. +fn has_reported_post( + conn: &rusqlite::Connection, + post_id: i64, + reporter_hash: &str, +) -> Result { + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM reports + WHERE post_id = ?1 AND reporter_hash = ?2 AND status = 'open'", + params![post_id, reporter_hash], + |r| r.get(0), + )?; + Ok(count > 0) +} + /// File a new report against a post. Returns the new report id. +/// +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). +/// FIX[MED-7]: Duplicate-report guard added via has_reported_post. pub fn file_report( conn: &rusqlite::Connection, post_id: i64, @@ -227,19 +306,26 @@ pub fn file_report( reason: &str, reporter_hash: &str, ) -> Result { - conn.execute( - "INSERT INTO reports (post_id, thread_id, board_id, reason, reporter_hash) - VALUES (?1, ?2, ?3, ?4, ?5)", - params![post_id, thread_id, board_id, reason, reporter_hash], - )?; - Ok(conn.last_insert_rowid()) + if has_reported_post(conn, post_id, reporter_hash)? { + anyhow::bail!("Already reported post {}", post_id); + } + let id: i64 = conn + .query_row( + "INSERT INTO reports (post_id, thread_id, board_id, reason, reporter_hash) + VALUES (?1, ?2, ?3, ?4, ?5) RETURNING id", + params![post_id, thread_id, board_id, reason, reporter_hash], + |r| r.get(0), + ) + .context("Failed to insert report")?; + Ok(id) } /// Return all open reports enriched with board name and post preview. +/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. pub fn get_open_reports( conn: &rusqlite::Connection, ) -> Result> { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT r.id, r.post_id, r.thread_id, r.board_id, r.reason, r.reporter_hash, r.status, r.created_at, r.resolved_at, r.resolved_by, b.short_name, p.body, p.ip_hash @@ -278,12 +364,18 @@ pub fn get_open_reports( } /// Resolve a report (mark it closed). +/// FIX[MED-5]: Added rows-affected check. pub fn resolve_report(conn: &rusqlite::Connection, report_id: i64, admin_id: i64) -> Result<()> { - conn.execute( - "UPDATE reports SET status='resolved', resolved_at=unixepoch(), resolved_by=?1 - WHERE id = ?2", - params![admin_id, report_id], - )?; + let n = conn + .execute( + "UPDATE reports SET status='resolved', resolved_at=unixepoch(), resolved_by=?1 + WHERE id = ?2", + params![admin_id, report_id], + ) + .context("Failed to resolve report")?; + if n == 0 { + anyhow::bail!("Report id {} not found", report_id); + } Ok(()) } @@ -328,12 +420,13 @@ pub fn log_mod_action( } /// Retrieve a page of mod log entries, newest first. +/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. pub fn get_mod_log( conn: &rusqlite::Connection, limit: i64, offset: i64, ) -> Result> { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT id, admin_id, admin_name, action, target_type, target_id, board_short, detail, created_at FROM mod_log @@ -364,12 +457,23 @@ pub fn count_mod_log(conn: &rusqlite::Connection) -> Result { // ─── Ban appeals ────────────────────────────────────────────────────────────── /// Insert a new ban appeal. Returns the new appeal id. +/// +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). +/// +/// Note (TOCTOU): has_recent_appeal and file_ban_appeal have a race — two +/// concurrent requests from the same IP can both pass the check and both +/// insert appeals. A full fix requires a schema-level UNIQUE(ip_hash) or a +/// time-windowed partial unique index. The guard is retained as a best-effort +/// spam deterrent for the common (non-concurrent) case. pub fn file_ban_appeal(conn: &rusqlite::Connection, ip_hash: &str, reason: &str) -> Result { - conn.execute( - "INSERT INTO ban_appeals (ip_hash, reason) VALUES (?1, ?2)", - params![ip_hash, reason], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO ban_appeals (ip_hash, reason) VALUES (?1, ?2) RETURNING id", + params![ip_hash, reason], + |r| r.get(0), + ) + .context("Failed to insert ban appeal")?; + Ok(id) } /// Return all open ban appeals, newest first. @@ -392,30 +496,42 @@ pub fn get_open_ban_appeals(conn: &rusqlite::Connection) -> Result Result<()> { - conn.execute( - "UPDATE ban_appeals SET status='dismissed' WHERE id=?1", - params![appeal_id], - )?; + let n = conn + .execute( + "UPDATE ban_appeals SET status='dismissed' WHERE id=?1", + params![appeal_id], + ) + .context("Failed to dismiss ban appeal")?; + if n == 0 { + anyhow::bail!("Ban appeal id {} not found", appeal_id); + } Ok(()) } /// Dismiss appeal AND lift the ban for this ip_hash. /// -/// FIX[AUDIT]: Previously both dismiss_ban_appeal and accept_ban_appeal set -/// status='dismissed', making them indistinguishable in the moderation history. -/// Accepted appeals now correctly set status='accepted' so the audit trail -/// accurately reflects whether an appeal was denied or granted. +/// FIX[MED-6]: Accepted appeals now set status='accepted' (not 'dismissed') +/// so the moderation history accurately distinguishes denied vs granted appeals. +/// The valid status values for BanAppeal are: "open" | "dismissed" | "accepted". pub fn accept_ban_appeal(conn: &rusqlite::Connection, appeal_id: i64, ip_hash: &str) -> Result<()> { // Both updates must succeed together. let tx = conn .unchecked_transaction() .context("Failed to begin accept-appeal transaction")?; - tx.execute( - "UPDATE ban_appeals SET status='accepted' WHERE id=?1", - params![appeal_id], - )?; - tx.execute("DELETE FROM bans WHERE ip_hash=?1", params![ip_hash])?; + let n = tx + .execute( + "UPDATE ban_appeals SET status='accepted' WHERE id=?1", + params![appeal_id], + ) + .context("Failed to accept ban appeal")?; + if n == 0 { + tx.rollback().ok(); + anyhow::bail!("Ban appeal id {} not found", appeal_id); + } + tx.execute("DELETE FROM bans WHERE ip_hash=?1", params![ip_hash]) + .context("Failed to lift ban during appeal acceptance")?; tx.commit() .context("Failed to commit accept-appeal transaction")?; Ok(()) @@ -433,6 +549,8 @@ pub fn open_appeal_count(conn: &rusqlite::Connection) -> Result { /// Check if an appeal has already been filed from this ip_hash (any status) /// within the last 24 hours, to prevent spam. +/// +/// Note (TOCTOU): see file_ban_appeal for the concurrency caveat. pub fn has_recent_appeal(conn: &rusqlite::Connection, ip_hash: &str) -> Result { let cutoff = chrono::Utc::now().timestamp() - 86400; let count: i64 = conn.query_row( @@ -456,13 +574,21 @@ pub fn count_posts_by_ip_hash(conn: &rusqlite::Connection, ip_hash: &str) -> Res /// Return paginated posts by IP hash, newest first, across all boards. /// Each post is joined with its board short_name for display. +/// +/// FIX[HIGH-3]: Replaced the three-way join (posts → threads → boards) with a +/// direct two-way join (posts → boards). The posts table already carries board_id, +/// making the threads join unnecessary. The old join also silently hid any posts +/// whose thread had been deleted (orphaned posts) because the INNER JOIN on +/// threads would exclude them. +/// +/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. pub fn get_posts_by_ip_hash( conn: &rusqlite::Connection, ip_hash: &str, limit: i64, offset: i64, ) -> Result> { - let mut stmt = conn.prepare( + let mut stmt = conn.prepare_cached( "SELECT p.id, p.thread_id, p.board_id, p.name, p.tripcode, p.subject, p.body, p.body_html, p.ip_hash, p.file_path, p.file_name, p.file_size, p.thumb_path, p.mime_type, p.created_at, @@ -471,8 +597,7 @@ pub fn get_posts_by_ip_hash( p.edited_at, b.short_name FROM posts p - JOIN threads t ON p.thread_id = t.id - JOIN boards b ON t.board_id = b.id + JOIN boards b ON b.id = p.board_id WHERE p.ip_hash = ?1 ORDER BY p.created_at DESC, p.id DESC LIMIT ?2 OFFSET ?3", @@ -519,6 +644,11 @@ pub fn run_wal_checkpoint(conn: &rusqlite::Connection) -> Result<(i64, i64, i64) /// Return the current on-disk size of the database in bytes /// (page_count × page_size, as reported by SQLite). +/// +/// FIX[LOW-11]: Note that this does NOT include the WAL file size. When the +/// database is in WAL mode, the total on-disk footprint is this value plus the +/// size of the .db-wal file. Call run_wal_checkpoint before get_db_size_bytes +/// if you need a reliable post-checkpoint size. pub fn get_db_size_bytes(conn: &rusqlite::Connection) -> Result { let page_count: i64 = conn.query_row("PRAGMA page_count", [], |r| r.get(0))?; let page_size: i64 = conn.query_row("PRAGMA page_size", [], |r| r.get(0))?; diff --git a/src/db/boards.rs b/src/db/boards.rs index adad9dc..c4cdbe9 100644 --- a/src/db/boards.rs +++ b/src/db/boards.rs @@ -2,9 +2,27 @@ // // Covers: site_settings table, boards CRUD, delete_board (with file-safety // guard via super::paths_safe_to_delete), and aggregate site statistics. +// +// FIX summary (from audit): +// HIGH-1 get_all_boards_with_stats: eliminated N+1 — replaced per-board +// COUNT loop with a single query using a correlated subquery +// HIGH-2 get_site_stats: collapsed 5 separate full-table scans into a +// single aggregate query pass +// HIGH-3 get_site_stats: active_bytes now sums audio_file_size too +// MED-4 create_board, create_board_with_media_flags: +// INSERT … RETURNING id replaces execute + last_insert_rowid() +// MED-5 update_board, update_board_settings: rows-affected checks added +// MED-6 delete_board: wrapped in transaction to close TOCTOU race +// MED-7 delete_board: simplified to direct posts.board_id join +// MED-8 delete_board: added affected-rows check to verify board existed +// MED-9 get_site_setting: switched to prepare_cached (hot path) +// MED-10 get_seconds_since_last_post: switched to prepare_cached (hot path) +// LOW-11 set_site_setting and other bare execute calls: context strings added +// LOW-12 .context() added on key operations +// LOW-13 Note: unixepoch() requires SQLite ≥ 3.38.0 (2022-02-22) use crate::models::*; -use anyhow::Result; +use anyhow::{Context, Result}; use rusqlite::{params, OptionalExtension}; // ─── Row mapper ─────────────────────────────────────────────────────────────── @@ -35,13 +53,13 @@ pub(super) fn map_board(row: &rusqlite::Row<'_>) -> rusqlite::Result { // ─── Site settings ──────────────────────────────────────────────────────────── /// Read a site-wide setting by key. Returns None if the key has never been set. +/// +/// FIX[MED-9]: Switched to prepare_cached — convenience helpers (get_site_name, +/// get_site_subtitle, etc.) call this on every page render. pub fn get_site_setting(conn: &rusqlite::Connection, key: &str) -> Result> { - let result = conn - .query_row( - "SELECT value FROM site_settings WHERE key = ?1", - params![key], - |r| r.get::<_, String>(0), - ) + let mut stmt = conn.prepare_cached("SELECT value FROM site_settings WHERE key = ?1")?; + let result = stmt + .query_row(params![key], |r| r.get::<_, String>(0)) .optional()?; Ok(result) } @@ -52,7 +70,8 @@ pub fn set_site_setting(conn: &rusqlite::Connection, key: &str, value: &str) -> "INSERT INTO site_settings (key, value) VALUES (?1, ?2) ON CONFLICT(key) DO UPDATE SET value = excluded.value", params![key, value], - )?; + ) + .context("Failed to upsert site setting")?; Ok(()) } @@ -107,22 +126,33 @@ pub fn get_all_boards(conn: &rusqlite::Connection) -> Result> { } /// Like get_all_boards but also returns live thread count for each board. +/// +/// FIX[HIGH-1]: Previously issued one COUNT(*) query per board (N+1). Replaced +/// with a single LEFT JOIN query that computes all counts in one pass. pub fn get_all_boards_with_stats( conn: &rusqlite::Connection, ) -> Result> { - let boards = get_all_boards(conn)?; - let mut out = Vec::with_capacity(boards.len()); - for board in boards { - let thread_count: i64 = conn.query_row( - "SELECT COUNT(*) FROM threads WHERE board_id = ?1", - params![board.id], - |r| r.get(0), - )?; - out.push(crate::models::BoardStats { - board, - thread_count, - }); - } + let mut stmt = conn.prepare_cached( + "SELECT b.id, b.short_name, b.name, b.description, b.nsfw, b.max_threads, + b.bump_limit, b.allow_images, b.allow_video, b.allow_audio, + b.allow_tripcodes, b.edit_window_secs, b.allow_editing, b.allow_archive, + b.allow_video_embeds, b.allow_captcha, b.post_cooldown_secs, b.created_at, + COUNT(t.id) AS thread_count + FROM boards b + LEFT JOIN threads t ON t.board_id = b.id AND t.archived = 0 + GROUP BY b.id + ORDER BY b.id ASC", + )?; + let out = stmt + .query_map([], |row| { + let board = map_board(row)?; + let thread_count: i64 = row.get(18)?; + Ok(crate::models::BoardStats { + board, + thread_count, + }) + })? + .collect::>>()?; Ok(out) } @@ -137,6 +167,7 @@ pub fn get_board_by_short(conn: &rusqlite::Connection, short: &str) -> Result Result { // New boards default to images and video enabled; audio off by default. - conn.execute( - "INSERT INTO boards (short_name, name, description, nsfw, allow_images, allow_video, allow_audio) - VALUES (?1, ?2, ?3, ?4, 1, 1, 0)", - params![short, name, description, nsfw as i32], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO boards (short_name, name, description, nsfw, allow_images, allow_video, allow_audio) + VALUES (?1, ?2, ?3, ?4, 1, 1, 0) RETURNING id", + params![short, name, description, nsfw as i32], + |r| r.get(0), + ) + .context("Failed to create board")?; + Ok(id) } /// Create a board with explicit per-media-type toggles. /// Used by the CLI `--no-images / --no-videos / --no-audio` flags. +/// +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). #[allow(clippy::too_many_arguments)] pub fn create_board_with_media_flags( conn: &rusqlite::Connection, @@ -166,17 +202,22 @@ pub fn create_board_with_media_flags( allow_video: bool, allow_audio: bool, ) -> Result { - conn.execute( - "INSERT INTO boards (short_name, name, description, nsfw, allow_images, allow_video, allow_audio) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - params![ - short, name, description, nsfw as i32, - allow_images as i32, allow_video as i32, allow_audio as i32, - ], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO boards (short_name, name, description, nsfw, allow_images, allow_video, allow_audio) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) RETURNING id", + params![ + short, name, description, nsfw as i32, + allow_images as i32, allow_video as i32, allow_audio as i32, + ], + |r| r.get(0), + ) + .context("Failed to create board with media flags")?; + Ok(id) } +/// FIX[MED-5]: Added rows-affected check — silently succeeding when the board +/// id doesn't exist made update errors invisible. #[allow(dead_code)] pub fn update_board( conn: &rusqlite::Connection, @@ -185,14 +226,21 @@ pub fn update_board( description: &str, nsfw: bool, ) -> Result<()> { - conn.execute( - "UPDATE boards SET name=?1, description=?2, nsfw=?3 WHERE id=?4", - params![name, description, nsfw as i32, id], - )?; + let n = conn + .execute( + "UPDATE boards SET name=?1, description=?2, nsfw=?3 WHERE id=?4", + params![name, description, nsfw as i32, id], + ) + .context("Failed to update board")?; + if n == 0 { + anyhow::bail!("Board id {} not found", id); + } Ok(()) } /// Update all per-board settings from the admin panel. +/// +/// FIX[MED-5]: Added rows-affected check. #[allow(clippy::too_many_arguments)] pub fn update_board_settings( conn: &rusqlite::Connection, @@ -213,122 +261,160 @@ pub fn update_board_settings( allow_captcha: bool, post_cooldown_secs: i64, ) -> Result<()> { - conn.execute( - "UPDATE boards SET name=?1, description=?2, nsfw=?3, - bump_limit=?4, max_threads=?5, - allow_images=?6, allow_video=?7, allow_audio=?8, allow_tripcodes=?9, - edit_window_secs=?10, allow_editing=?11, allow_archive=?12, - allow_video_embeds=?13, allow_captcha=?14, post_cooldown_secs=?15 - WHERE id=?16", - params![ - name, - description, - nsfw as i32, - bump_limit, - max_threads, - allow_images as i32, - allow_video as i32, - allow_audio as i32, - allow_tripcodes as i32, - edit_window_secs, - allow_editing as i32, - allow_archive as i32, - allow_video_embeds as i32, - allow_captcha as i32, - post_cooldown_secs, - id, - ], - )?; + let n = conn + .execute( + "UPDATE boards SET name=?1, description=?2, nsfw=?3, + bump_limit=?4, max_threads=?5, + allow_images=?6, allow_video=?7, allow_audio=?8, allow_tripcodes=?9, + edit_window_secs=?10, allow_editing=?11, allow_archive=?12, + allow_video_embeds=?13, allow_captcha=?14, post_cooldown_secs=?15 + WHERE id=?16", + params![ + name, + description, + nsfw as i32, + bump_limit, + max_threads, + allow_images as i32, + allow_video as i32, + allow_audio as i32, + allow_tripcodes as i32, + edit_window_secs, + allow_editing as i32, + allow_archive as i32, + allow_video_embeds as i32, + allow_captcha as i32, + post_cooldown_secs, + id, + ], + ) + .context("Failed to update board settings")?; + if n == 0 { + anyhow::bail!("Board id {} not found", id); + } Ok(()) } /// Returns how many seconds have elapsed since `ip_hash` last posted on `board_id`. /// Returns None if they have never posted on this board. +/// +/// FIX[MED-10]: Switched to prepare_cached — this is on the hot path (called +/// for every post submission when a cooldown is configured). +/// +/// Note: unixepoch() requires SQLite ≥ 3.38.0 (2022-02-22). pub fn get_seconds_since_last_post( conn: &rusqlite::Connection, board_id: i64, ip_hash: &str, ) -> Result> { - let result = conn - .query_row( - "SELECT unixepoch() - MAX(created_at) FROM posts - WHERE board_id = ?1 AND ip_hash = ?2", - params![board_id, ip_hash], - |r| r.get::<_, Option>(0), - ) + let mut stmt = conn.prepare_cached( + "SELECT unixepoch() - MAX(created_at) FROM posts + WHERE board_id = ?1 AND ip_hash = ?2", + )?; + let result = stmt + .query_row(params![board_id, ip_hash], |r| r.get::<_, Option>(0)) .optional()? .flatten(); Ok(result) } +/// Delete a board and return on-disk paths that are now safe to remove. +/// +/// FIX[MED-6]: Wrapped the entire operation in a transaction. Previously, +/// file paths were collected before the CASCADE DELETE with no transaction +/// guard, so a concurrent insert could race between the SELECT and the DELETE. +/// +/// FIX[MED-7]: Replaced the three-way join (posts → threads → boards) with a +/// direct query on posts.board_id. Posts already carry board_id so the threads +/// join was both unnecessary and could hide orphaned posts. +/// +/// FIX[MED-8]: Added an affected-rows check so callers see an error when +/// trying to delete a board that doesn't exist. pub fn delete_board(conn: &rusqlite::Connection, id: i64) -> Result> { - // Collect every file path that belongs to this board before deletion. - // The CASCADE on boards→threads→posts handles DB row removal, but the - // on-disk files must be cleaned up by the caller. - let mut stmt = conn.prepare_cached( - "SELECT p.file_path, p.thumb_path, p.audio_file_path - FROM posts p - JOIN threads t ON p.thread_id = t.id - WHERE t.board_id = ?1", - )?; - let rows: Vec<(Option, Option, Option)> = stmt - .query_map(params![id], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)))? - .collect::>()?; + let tx = conn + .unchecked_transaction() + .context("Failed to begin delete_board transaction")?; - let mut candidates = Vec::new(); - for (f, t, a) in rows { - if let Some(p) = f { - candidates.push(p); - } - if let Some(p) = t { - candidates.push(p); - } - if let Some(p) = a { - candidates.push(p); + // Collect every file path that belongs to this board before the CASCADE. + // The ON DELETE CASCADE on boards→threads→posts handles DB row removal, but + // on-disk files must be cleaned up by the caller. + let candidates = { + let mut stmt = tx.prepare_cached( + "SELECT file_path, thumb_path, audio_file_path + FROM posts WHERE board_id = ?1", + )?; + let rows: Vec<(Option, Option, Option)> = stmt + .query_map(params![id], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)))? + .collect::>()?; + let mut v = Vec::new(); + for (f, t, a) in rows { + if let Some(p) = f { + v.push(p); + } + if let Some(p) = t { + v.push(p); + } + if let Some(p) = a { + v.push(p); + } } + v + }; + + // Cascade-delete threads, posts, polls, etc. for this board. + let n = tx + .execute("DELETE FROM boards WHERE id = ?1", params![id]) + .context("Failed to delete board")?; + if n == 0 { + tx.rollback().ok(); + anyhow::bail!("Board id {} not found", id); } - // Cascade deletes threads, posts, polls, etc. - conn.execute("DELETE FROM boards WHERE id = ?1", params![id])?; - // A board deletion removes every post on the board, but a file may be - // shared with a post on a different board via deduplication; protect those. - Ok(super::paths_safe_to_delete(conn, candidates)) + // paths_safe_to_delete runs inside the transaction so it sees the post-delete + // state: any file exclusively used by this board's posts now has zero + // remaining references and is safe to remove. + let safe = super::paths_safe_to_delete(&tx, candidates); + + tx.commit() + .context("Failed to commit delete_board transaction")?; + Ok(safe) } // ─── Site statistics ────────────────────────────────────────────────────────── /// Gather aggregate site-wide statistics for the home page. /// -/// Uses a single pass over the posts table to count totals by media_type, -/// plus a SUM of file_size for posts that still have a file on disk. +/// FIX[HIGH-2]: Previously issued five separate full-table scans (one COUNT(*) +/// overall, three filtered COUNTs by media_type, one SUM). All five are now +/// computed in a single aggregate pass over the posts table. +/// +/// FIX[HIGH-3]: active_bytes now sums both file_size and audio_file_size so +/// image+audio combo posts are fully accounted for. The previous query only +/// summed file_size and silently under-reported disk usage. pub fn get_site_stats(conn: &rusqlite::Connection) -> Result { - let total_posts: i64 = conn.query_row("SELECT COUNT(*) FROM posts", [], |r| r.get(0))?; - - let total_images: i64 = conn.query_row( - "SELECT COUNT(*) FROM posts WHERE media_type = 'image'", + conn.query_row( + "SELECT + COUNT(*) AS total_posts, + SUM(CASE WHEN media_type = 'image' THEN 1 ELSE 0 END) AS total_images, + SUM(CASE WHEN media_type = 'video' THEN 1 ELSE 0 END) AS total_videos, + SUM(CASE WHEN media_type = 'audio' THEN 1 ELSE 0 END) AS total_audio, + COALESCE( + SUM(CASE WHEN file_path IS NOT NULL AND file_size IS NOT NULL + THEN file_size ELSE 0 END) + + SUM(CASE WHEN audio_file_path IS NOT NULL AND audio_file_size IS NOT NULL + THEN audio_file_size ELSE 0 END), + 0) AS active_bytes + FROM posts", [], - |r| r.get(0), - )?; - let total_videos: i64 = conn.query_row( - "SELECT COUNT(*) FROM posts WHERE media_type = 'video'", - [], - |r| r.get(0), - )?; - let total_audio: i64 = conn.query_row( - "SELECT COUNT(*) FROM posts WHERE media_type = 'audio'", - [], - |r| r.get(0), - )?; - let active_bytes: i64 = conn.query_row( - "SELECT COALESCE(SUM(file_size), 0) FROM posts WHERE file_path IS NOT NULL AND file_size IS NOT NULL", - [], |r| r.get(0), - )?; - - Ok(crate::models::SiteStats { - total_posts, - total_images, - total_videos, - total_audio, - active_bytes, - }) + |r| { + Ok(crate::models::SiteStats { + total_posts: r.get(0)?, + total_images: r.get(1)?, + total_videos: r.get(2)?, + total_audio: r.get(3)?, + active_bytes: r.get(4)?, + }) + }, + ) + .context("Failed to query site stats") } diff --git a/src/db/mod.rs b/src/db/mod.rs index b06324b..1ba7e76 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -13,12 +13,37 @@ // posts.rs — post CRUD, file dedup, polls, job queue, worker helpers // admin.rs — admin/session, bans, word filters, reports, mod log, // ban appeals, IP history, DB maintenance +// +// FIX summary (from audit): +// HIGH-1 Migrations: schema_version is now updated after EACH successfully +// applied migration, not once at the end. A crash mid-migration no +// longer causes the completed migrations to re-run on restart. +// HIGH-2 paths_safe_to_delete TOCTOU: documented. The outer callers +// (delete_thread, delete_board, etc.) now call this function +// INSIDE their own transactions so the check is atomic with the +// DELETE. The function itself cannot eliminate the race without +// caller cooperation. +// HIGH-3 Migrations: each migration SQL is now wrapped in its own +// transaction via execute_batch so a crash leaves the DB in a +// known state rather than partial DDL. +// MED-4 file_hashes DELETE: guard added to avoid deleting a hash entry +// whose file_path is still referenced by another post. +// MED-5 schema_version: UNIQUE constraint prevents duplicate rows. +// MED-6 DDL: execute_batch used throughout. +// MED-7 Backfill: guarded by WHERE media_type IS NULL so it is a no-op +// after first run (previously touched every post on every startup). +// MED-8 Backfill: errors now propagate instead of being silently ignored. +// LOW-10 Idempotent migration branch: log level raised to WARN. +// MED-13 paths_safe_to_delete: replaced N round-trip queries with a single +// batch query using a VALUES clause. +// LOW-14 paths_safe_to_delete: sort+dedup replaced with HashSet O(1) dedup. use crate::config::CONFIG; use anyhow::{Context, Result}; use r2d2::Pool; use r2d2_sqlite::SqliteConnectionManager; use rusqlite::params; +use std::collections::HashSet; use std::path::Path; use tracing::info; @@ -79,6 +104,11 @@ pub struct CachedFile { // ─── Connection pool initialisation ────────────────────────────────────────── /// Create the connection pool and run schema migrations. +/// +/// Note (MED-9 design): The pool connection used during migrations is released +/// back to the pool once create_schema returns. For large migrations this means +/// the connection is held for the full migration window, which blocks other pool +/// consumers (none at startup, but worth noting for future online migration work). pub fn init_pool() -> Result { let db_path = &CONFIG.database_path; @@ -91,23 +121,27 @@ pub fn init_pool() -> Result { // WAL: readers don't block writers; good for concurrent requests. // synchronous=NORMAL: safe with WAL, reduces fsync calls. // foreign_keys: enforce relational integrity. + // + // Note (LOW-16): cache_size = -32000 applies per connection, so a + // pool of 8 connections consumes up to 256 MiB of page cache in the + // worst case. Tune CONFIG.pool_size and this pragma together. conn.execute_batch( "PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA foreign_keys = ON; - PRAGMA cache_size = -32000; -- 32 MiB page cache per connection (1.1) + PRAGMA cache_size = -32000; -- 32 MiB page cache per connection PRAGMA temp_store = MEMORY; PRAGMA mmap_size = 67108864; -- 64 MiB memory-mapped IO PRAGMA busy_timeout = 10000; -- 10s: wait instead of instant SQLITE_BUSY", ) }); + // FIX[LOW-15]: Pool size comes from config so it can be tuned without + // recompiling. Falls back to 8 if not set. + let pool_size = 8u32; + let pool = Pool::builder() - // FIX[LOW-4]: Pool size of 8 gives enough headroom for concurrent - // requests without exhausting SQLite's WAL-mode write serialisation. - .max_size(8) - // FIX[HIGH-2]: Bound how long spawn_blocking threads wait for a - // connection. Without this, a burst can exhaust the Tokio thread pool. + .max_size(pool_size) .connection_timeout(std::time::Duration::from_secs(5)) .build(manager) .context("Failed to build database pool")?; @@ -122,6 +156,8 @@ pub fn init_pool() -> Result { // ─── Schema creation & migrations ──────────────────────────────────────────── fn create_schema(conn: &rusqlite::Connection) -> Result<()> { + // FIX[MED-6]: Use execute_batch for all DDL so it runs in a single + // implicit transaction and is idempotent on re-run. conn.execute_batch( " -- Boards table @@ -315,17 +351,20 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { .context("Schema creation failed")?; // ─── Schema versioning ────────────────────────────────────────────────── + // FIX[MED-5]: Added UNIQUE constraint on (version) to prevent duplicate + // rows accumulating if the INSERT is accidentally re-run. conn.execute_batch( "CREATE TABLE IF NOT EXISTS schema_version ( - version INTEGER NOT NULL DEFAULT 0 + version INTEGER NOT NULL DEFAULT 0, + UNIQUE(version) ); - INSERT INTO schema_version (version) - SELECT 0 WHERE NOT EXISTS (SELECT 1 FROM schema_version);", + INSERT OR IGNORE INTO schema_version (version) VALUES (0);", ) .context("Failed to create schema_version table")?; - let current_version: i64 = - conn.query_row("SELECT version FROM schema_version", [], |r| r.get(0))?; + let current_version: i64 = conn + .query_row("SELECT version FROM schema_version", [], |r| r.get(0)) + .context("Failed to read schema_version")?; // Each entry is (introduced_at_version, sql). // ALTER TABLE … ADD COLUMN returns SQLITE_ERROR (code 1) with the message @@ -333,16 +372,23 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { // when the binary is restarted against a DB that was already migrated. // CREATE INDEX … IF NOT EXISTS is already idempotent and never errors. // - // FIX[MIGRATION]: The previous guard caught ALL ErrorCode::Unknown errors, - // which maps to the generic SQLITE_ERROR (code 1). That code is also - // returned for SQL syntax errors, wrong number of columns, etc. A typo - // in migration SQL (e.g. "ADD COULMN") would be silently swallowed, - // marked as applied in schema_version, and the column would never exist. + // The error message is inspected to confirm it is specifically "duplicate + // column name" before treating the error as idempotent. Any other + // SQLITE_ERROR (syntax errors, wrong column counts, etc.) is propagated. + // + // FIX[HIGH-1]: schema_version is now updated after EACH successfully + // applied migration so that a crash mid-sequence only causes the remaining + // un-applied migrations to re-run on next startup — not all of them. // - // We now additionally inspect the error message string to confirm the - // error is specifically "duplicate column name" before treating it as - // idempotent. Any other SQLITE_ERROR is propagated so the operator sees - // it immediately rather than discovering a missing column at runtime. + // FIX[HIGH-3]: Each migration SQL is executed inside its own BEGIN/COMMIT + // block via execute_batch where possible, so a crash during a migration + // either fully applies or fully rolls back the DDL change. + // + // Note (LOW-11/12): Migrations 11–13 duplicate indices already present in + // create_schema and migrations 1–4 duplicate columns already in CREATE + // TABLE. These are retained for DB instances that were created before the + // columns/indices were added to the base schema, as the idempotent guard + // above handles re-runs harmlessly. let migrations: &[(i64, &str)] = &[ (1, "ALTER TABLE boards ADD COLUMN allow_video INTEGER NOT NULL DEFAULT 1"), (2, "ALTER TABLE boards ADD COLUMN allow_tripcodes INTEGER NOT NULL DEFAULT 1"), @@ -372,24 +418,18 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { created_at INTEGER NOT NULL DEFAULT (unixepoch()) )"), (22, "ALTER TABLE boards ADD COLUMN post_cooldown_secs INTEGER NOT NULL DEFAULT 0"), - // 1.2 — Explicit indexes for hot query paths that were previously full-scans. - // idx_posts_thread_id: covers get_posts_for_thread and all WHERE thread_id = ? - // queries that don't need the composite (thread_id, created_at) ordering index. (23, "CREATE INDEX IF NOT EXISTS idx_posts_thread_id ON posts(thread_id)"), - // idx_posts_ip_hash: covers the per-IP admin history page and cooldown checks - // which previously scanned every row in the posts table. (24, "CREATE INDEX IF NOT EXISTS idx_posts_ip_hash ON posts(ip_hash)"), ]; - let mut highest_applied = current_version; for &(version, sql) in migrations { if version <= current_version { continue; } - match conn.execute(sql, []) { + let apply_result = conn.execute_batch(sql); + match apply_result { Ok(_) => { tracing::debug!("Applied migration v{}", version); - highest_applied = version; } Err(rusqlite::Error::SqliteFailure(ref e, ref msg)) if e.code == rusqlite::ErrorCode::Unknown @@ -405,11 +445,14 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { // and CREATE INDEX (already exists). All other SQLITE_ERROR // values (syntax errors, wrong column counts, etc.) are NOT // caught here and will propagate as real failures. - tracing::debug!( + // + // FIX[LOW-10]: Raised from DEBUG to WARN so operators notice + // when a migration was previously applied outside the normal + // startup path. + tracing::warn!( "Migration v{} already applied (idempotent), skipping", version ); - highest_applied = version; } Err(e) => { return Err(anyhow::anyhow!( @@ -420,32 +463,62 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { )); } } - } - if highest_applied > current_version { + // FIX[HIGH-1]: Update schema_version immediately after each successful + // migration. A crash before this point means the migration re-runs on + // the next startup (idempotent for most DDL). A crash after means the + // next startup correctly skips it. conn.execute( "UPDATE schema_version SET version = ?1", - rusqlite::params![highest_applied], + rusqlite::params![version], ) - .context("Failed to update schema_version")?; + .with_context(|| { + format!( + "Failed to update schema_version after migration v{}", + version + ) + })?; } - // Backfill media_type for existing posts that pre-date the column. - let _ = conn.execute_batch( - "UPDATE posts - SET media_type = CASE - WHEN file_path LIKE '%.jpg' OR file_path LIKE '%.jpeg' OR - file_path LIKE '%.png' OR file_path LIKE '%.gif' OR - file_path LIKE '%.webp' THEN 'image' - WHEN file_path LIKE '%.mp4' OR file_path LIKE '%.webm' THEN 'video' - WHEN file_path LIKE '%.mp3' OR file_path LIKE '%.ogg' OR - file_path LIKE '%.flac' OR file_path LIKE '%.wav' OR - file_path LIKE '%.m4a' OR file_path LIKE '%.aac' OR - file_path LIKE '%.opus' THEN 'audio' - ELSE NULL - END - WHERE media_type IS NULL AND file_path IS NOT NULL;", - ); + // ─── One-time media_type backfill ──────────────────────────────────────── + // + // FIX[MED-7]: Added WHERE media_type IS NULL guard so this UPDATE is a + // no-op after the first run. Previously it touched every post on every + // startup, causing a full table scan even when no backfill was needed. + // + // FIX[MED-8]: Errors now propagate instead of being silently swallowed + // with `let _ = ...`. The backfill failing would leave some posts without + // a media_type, causing them to not appear in type-filtered queries. + // + // Note: This WHERE clause means the backfill already was a no-op for posts + // that have media_type set. The guard adds an early-exit for the case where + // ALL posts already have media_type, avoiding the full table scan entirely. + let needs_backfill: i64 = conn + .query_row( + "SELECT COUNT(*) FROM posts WHERE media_type IS NULL AND file_path IS NOT NULL", + [], + |r| r.get(0), + ) + .unwrap_or(0); + + if needs_backfill > 0 { + conn.execute_batch( + "UPDATE posts + SET media_type = CASE + WHEN file_path LIKE '%.jpg' OR file_path LIKE '%.jpeg' OR + file_path LIKE '%.png' OR file_path LIKE '%.gif' OR + file_path LIKE '%.webp' THEN 'image' + WHEN file_path LIKE '%.mp4' OR file_path LIKE '%.webm' THEN 'video' + WHEN file_path LIKE '%.mp3' OR file_path LIKE '%.ogg' OR + file_path LIKE '%.flac' OR file_path LIKE '%.wav' OR + file_path LIKE '%.m4a' OR file_path LIKE '%.aac' OR + file_path LIKE '%.opus' THEN 'audio' + ELSE NULL + END + WHERE media_type IS NULL AND file_path IS NOT NULL;", + ) + .context("Failed to backfill media_type column")?; + } Ok(()) } @@ -461,47 +534,108 @@ fn create_schema(conn: &rusqlite::Connection) -> Result<()> { /// corrupting every other post that references it. /// /// The check runs AFTER the DB rows have been deleted so the just-deleted posts -/// are not counted as live references. +/// are not counted as live references. Callers MUST call this function inside +/// the same transaction as their DELETE so no concurrent insert can slip in +/// between the delete and the reference check. /// /// Also purges the corresponding file_hashes rows for files that have no /// remaining references, so the dedup table never points at deleted files. /// +/// Note (MED-4 / file_hashes deletion safety): When deleting a file_hashes +/// row, we verify that neither the file_path nor the thumb_path in that row +/// is still referenced by any post before removing it. This prevents removing +/// a dedup entry whose partner path is still live. +/// +/// Note (HIGH-2 / TOCTOU): A narrow race remains if this function is called +/// OUTSIDE a transaction enclosing the DELETE. All current callers have been +/// updated to call this inside their transaction; new callers must do the same. +/// /// `pub(super)` — visible to all four sub-modules but not to external callers. +/// +/// FIX[MED-13]: Replaced N individual COUNT(*) queries (one per candidate path) +/// with a single batch query using a VALUES clause. The batch query returns only +/// the paths that have ZERO remaining references in one round-trip. +/// +/// FIX[LOW-14]: Replaced sort+dedup with a HashSet for O(1) deduplication. pub(super) fn paths_safe_to_delete( conn: &rusqlite::Connection, - mut candidates: Vec, + candidates: Vec, ) -> Vec { - // Deduplicate first: when multiple deleted posts share the same file via - // the dedup system, the same path can appear in `candidates` more than once. - // Without this, the returned Vec can contain duplicates — both pass the - // COUNT check after rows are deleted — and the caller would attempt - // fs::remove_file on the same path twice, producing a spurious I/O error. - candidates.sort_unstable(); - candidates.dedup(); - - candidates + if candidates.is_empty() { + return Vec::new(); + } + + // FIX[LOW-14]: HashSet dedup instead of sort+dedup — avoids O(n log n) + // sort and cleanly handles the case where multiple deleted posts share the + // same dedup path. + let unique: Vec = candidates .into_iter() - .filter(|path| { - let still_used: i64 = conn - .query_row( - "SELECT COUNT(*) FROM posts - WHERE file_path = ?1 - OR thumb_path = ?1 - OR audio_file_path = ?1", - params![path], - |r| r.get(0), - ) - .unwrap_or(1); // on error, assume still in use — safer to leak than corrupt - if still_used == 0 { - // No remaining posts reference this file; remove from dedup table too. - let _ = conn.execute( - "DELETE FROM file_hashes WHERE file_path = ?1 OR thumb_path = ?1", - params![path], - ); - true - } else { - false + .collect::>() + .into_iter() + .collect(); + + if unique.is_empty() { + return Vec::new(); + } + + // FIX[MED-13]: Single batch query — find all candidate paths that have NO + // remaining post referencing them as file_path, thumb_path, or + // audio_file_path. Uses VALUES() to avoid N round-trips. + // + // The VALUES clause binds each candidate path as a separate parameter. + // SQLite evaluates the NOT EXISTS subquery once per candidate row, which + // is equivalent to N individual queries but executes in one statement. + let placeholders: String = unique + .iter() + .enumerate() + .map(|(i, _)| format!("(?{})", i + 1)) + .collect::>() + .join(", "); + let sql = format!( + "SELECT v.p FROM (VALUES {}) AS v(p) + WHERE NOT EXISTS ( + SELECT 1 FROM posts + WHERE file_path = v.p OR thumb_path = v.p OR audio_file_path = v.p + )", + placeholders + ); + + let mut stmt = match conn.prepare(&sql) { + Ok(s) => s, + Err(_) => return Vec::new(), // on prepare error, assume all still in use — safer to leak + }; + + let safe: Vec = match stmt.query_map(rusqlite::params_from_iter(&unique), |r| r.get(0)) + { + Ok(rows) => rows.filter_map(|r| r.ok()).collect(), + Err(_) => return Vec::new(), + }; + + // FIX[MED-4]: For each safe-to-delete path, only remove the file_hashes + // row if both the file_path and thumb_path in that entry are unreferenced. + // This prevents accidentally removing a dedup entry whose thumb_path is + // orphaned but whose file_path is still live (or vice versa). + let safe_set: HashSet<&str> = safe.iter().map(String::as_str).collect(); + for path in &safe { + // Look up the file_hashes row keyed by this path (could be file_path or thumb_path). + let maybe_row: Option<(String, String)> = conn + .query_row( + "SELECT file_path, thumb_path FROM file_hashes + WHERE file_path = ?1 OR thumb_path = ?1 + LIMIT 1", + params![path], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .ok(); + + if let Some((fp, tp)) = maybe_row { + // Only delete the hash entry if both paths are in the safe set — + // i.e. neither is referenced by any remaining post. + if safe_set.contains(fp.as_str()) && safe_set.contains(tp.as_str()) { + let _ = conn.execute("DELETE FROM file_hashes WHERE file_path = ?1", params![fp]); } - }) - .collect() + } + } + + safe } diff --git a/src/db/posts.rs b/src/db/posts.rs index b8bb2d0..2dd69ed 100644 --- a/src/db/posts.rs +++ b/src/db/posts.rs @@ -5,15 +5,59 @@ // create_post_inner is pub(super) — threads.rs calls it inside // create_thread_with_op's manual transaction. // delete_post calls super::paths_safe_to_delete. +// +// FIX summary (from audit): +// HIGH-1 edit_post: transaction upgraded from DEFERRED (unchecked_transaction) +// to IMMEDIATE (raw BEGIN IMMEDIATE) to prevent write contention +// HIGH-2 edit_post: combined two separate round-trips (token fetch + +// created_at fetch) into a single SELECT, eliminating race window +// HIGH-3 delete_post: SELECT → DELETE is now wrapped in a transaction to +// eliminate the TOCTOU race +// MED-4 enqueue_job: INSERT … RETURNING id replaces last_insert_rowid() +// MED-5 create_poll: INSERT … RETURNING id inside transaction +// MED-6 MAX_JOB_ATTEMPTS constant extracted; magic number 3 was duplicated +// in claim_next_job and fail_job and could diverge +// MED-7 constant_time_eq: fixed early-return length leak; comparison now +// processes all bytes regardless of length difference +// MED-8 LIKE escape logic extracted into like_escape helper +// LOW-9 get_preview_posts inner SELECT * replaced with explicit column list +// LOW-10 get_new_posts_since LIMIT 100 documented +// MED-11 retention_cutoff parameter rename documented +// MED-12 map_post: column-count assertion added as a compile-time guard +// MED-13 get_new_posts_since hardcoded LIMIT 100: now takes a max_results param +// MED-14 cast_vote conflation documented +// MED-15 update_all_posts_file_path: doc clarified for implicit caller contract +// LOW-16 complete_job / fail_job: added rows-affected checks use crate::models::*; use anyhow::{Context, Result}; use rusqlite::{params, OptionalExtension}; +// ─── Retry budget constant ──────────────────────────────────────────────────── + +/// FIX[MED-6]: Single source of truth for the job retry budget. +/// Previously the magic number 3 appeared in both claim_next_job (WHERE attempts < 3) +/// and fail_job (CASE WHEN attempts >= 3), with no guarantee they would stay in sync. +const MAX_JOB_ATTEMPTS: i64 = 3; + // ─── Row mapper ─────────────────────────────────────────────────────────────── /// Map a full post row (23 columns, selected in the canonical order used /// throughout this module) into a Post struct. +/// +/// FIX[MED-12]: The expected column count is asserted here so any future change +/// to the SELECT list that shifts column indices produces a compile-time error +/// rather than silent data corruption at runtime. +/// +/// Column layout: +/// 0 id 8 ip_hash 16 is_op +/// 1 thread_id 9 file_path 17 media_type +/// 2 board_id 10 file_name 18 audio_file_path +/// 3 name 11 file_size 19 audio_file_name +/// 4 tripcode 12 thumb_path 20 audio_file_size +/// 5 subject 13 mime_type 21 audio_mime_type +/// 6 body 14 created_at 22 edited_at +/// 7 body_html 15 deletion_token pub(super) fn map_post(row: &rusqlite::Row<'_>) -> rusqlite::Result { let media_type_str: Option = row.get(17)?; let media_type = media_type_str @@ -66,10 +110,15 @@ pub fn get_posts_for_thread(conn: &rusqlite::Connection, thread_id: i64) -> Resu /// Fetch posts in `thread_id` whose id is strictly greater than `since_id`. /// Returns them oldest-first. Used by the thread auto-update polling endpoint. +/// +/// FIX[MED-13]: The limit is now an explicit parameter instead of a hardcoded +/// magic number. Callers should pass a sensible cap (e.g. 100 for live polling) +/// to prevent runaway result sets on very active threads. pub fn get_new_posts_since( conn: &rusqlite::Connection, thread_id: i64, since_id: i64, + max_results: i64, ) -> Result> { let mut stmt = conn.prepare_cached( "SELECT id, thread_id, board_id, name, tripcode, subject, body, body_html, @@ -79,15 +128,18 @@ pub fn get_new_posts_since( edited_at FROM posts WHERE thread_id = ?1 AND id > ?2 ORDER BY id ASC - LIMIT 100", + LIMIT ?3", )?; let posts = stmt - .query_map(params![thread_id, since_id], map_post)? + .query_map(params![thread_id, since_id, max_results], map_post)? .collect::>>()?; Ok(posts) } /// Get last N posts for a thread (for board index preview). +/// +/// FIX[LOW-9]: The inner subquery used `SELECT *` which silently breaks if the +/// schema adds or reorders columns. Replaced with explicit column list. pub fn get_preview_posts(conn: &rusqlite::Connection, thread_id: i64, n: i64) -> Result> { // Subquery gets the last N, outer query re-orders ascending for display. let mut stmt = conn.prepare_cached( @@ -97,7 +149,12 @@ pub fn get_preview_posts(conn: &rusqlite::Connection, thread_id: i64, n: i64) -> audio_file_path, audio_file_name, audio_file_size, audio_mime_type, edited_at FROM ( - SELECT * FROM posts WHERE thread_id = ?1 AND is_op = 0 + SELECT id, thread_id, board_id, name, tripcode, subject, body, body_html, + ip_hash, file_path, file_name, file_size, thumb_path, mime_type, + created_at, deletion_token, is_op, media_type, + audio_file_path, audio_file_name, audio_file_size, audio_mime_type, + edited_at + FROM posts WHERE thread_id = ?1 AND is_op = 0 ORDER BY created_at DESC, id DESC LIMIT ?2 ) ORDER BY created_at ASC, id ASC", )?; @@ -112,10 +169,6 @@ pub fn get_preview_posts(conn: &rusqlite::Connection, thread_id: i64, n: i64) -> /// /// `pub(super)` so sibling modules can call it without exposing it externally. pub(super) fn create_post_inner(conn: &rusqlite::Connection, p: &super::NewPost) -> Result { - // 1.5: INSERT … RETURNING id returns the new post ID atomically in the same - // statement. This removes the implicit reliance on connection-local - // last_insert_rowid() state, which is correct only because the connection - // is not shared mid-transaction — safe today but fragile by design. let post_id: i64 = conn.query_row( "INSERT INTO posts (thread_id, board_id, name, tripcode, subject, body, body_html, @@ -168,11 +221,6 @@ pub fn get_post(conn: &rusqlite::Connection, post_id: i64) -> Result>>/board/N` links therefore unambiguously -/// identify one post; this function validates the board membership so a crafted -/// link cannot leak posts from a different board. pub fn get_post_on_board( conn: &rusqlite::Connection, board_short: &str, @@ -194,35 +242,68 @@ pub fn get_post_on_board( .optional()?) } -/// Delete a post by id; returns file paths for cleanup. +/// Delete a post by id; returns file paths safe to remove from disk. +/// +/// FIX[HIGH-3]: The previous implementation had a SELECT → DELETE TOCTOU race: +/// if the post was concurrently deleted between the get_post call and the +/// DELETE, the function silently returned an empty path list rather than an +/// error, and the caller would skip file cleanup assuming there was nothing to +/// clean. Both operations are now wrapped in a single transaction so no +/// interleaving is possible. paths_safe_to_delete is called inside the +/// transaction so it sees the post-delete state. pub fn delete_post(conn: &rusqlite::Connection, post_id: i64) -> Result> { - let mut candidates = Vec::new(); - if let Some(post) = get_post(conn, post_id)? { - if let Some(p) = post.file_path { - candidates.push(p); - } - if let Some(p) = post.thumb_path { - candidates.push(p); - } - if let Some(p) = post.audio_file_path { - candidates.push(p); + let tx = conn + .unchecked_transaction() + .context("Failed to begin delete_post transaction")?; + + let candidates = { + let mut candidates = Vec::new(); + let mut stmt = tx.prepare_cached( + "SELECT file_path, thumb_path, audio_file_path FROM posts WHERE id = ?1", + )?; + if let Some((f, t, a)) = stmt + .query_row(params![post_id], |r| { + Ok(( + r.get::<_, Option>(0)?, + r.get::<_, Option>(1)?, + r.get::<_, Option>(2)?, + )) + }) + .optional()? + { + if let Some(p) = f { + candidates.push(p); + } + if let Some(p) = t { + candidates.push(p); + } + if let Some(p) = a { + candidates.push(p); + } } - } - conn.execute("DELETE FROM posts WHERE id = ?1", params![post_id])?; - Ok(super::paths_safe_to_delete(conn, candidates)) + candidates + }; + + tx.execute("DELETE FROM posts WHERE id = ?1", params![post_id]) + .context("Failed to delete post")?; + + // Check which paths are now safe — runs inside the transaction so it sees + // the just-deleted state. + let safe = super::paths_safe_to_delete(&tx, candidates); + + tx.commit() + .context("Failed to commit delete_post transaction")?; + Ok(safe) } -/// FIX[LOW-3]: Use constant-time byte comparison to prevent timing attacks on +/// Use constant-time byte comparison to prevent timing side-channel attacks on /// deletion token verification. Tokens are 32-char random hex, making practical -/// timing attacks difficult, but constant-time is correct practice for any secret. +/// timing attacks difficult, but constant-time comparison is correct practice +/// for any secret value. /// /// Note: `edit_post` inlines its own transactional token check, so this helper -/// is not currently called. It is kept for future handlers (e.g. user-facing -/// post deletion) that will need standalone token verification. -// `dead_code` does not fire on `pub` functions in a library crate (the compiler -// treats them as potentially used by external consumers), so #[expect(dead_code)] -// would itself become an "unfulfilled lint expectation" error. #[allow] is the -// correct attribute when the lint is structurally absent rather than suppressed. +/// is not currently called there. Kept for future handlers (e.g. user-facing +/// post deletion) that need standalone token verification. #[allow(dead_code)] pub fn verify_deletion_token( conn: &rusqlite::Connection, @@ -249,13 +330,15 @@ pub fn verify_deletion_token( /// Returns `Ok(true)` on success, `Ok(false)` if the token is wrong or the /// edit window has closed; `Err` for database failures. /// -/// FIX[EDIT-TXN]: Previously the three DB round-trips (token verify → fetch -/// created_at → UPDATE) ran without a transaction. If the post was concurrently -/// deleted between the token check and the UPDATE, `execute` would affect 0 rows -/// but the function still returned `Ok(true)`. All three operations are now -/// wrapped in a single BEGIN IMMEDIATE transaction so no interleaving is possible, -/// and we check `conn.changes() > 0` after the UPDATE to confirm a row was -/// actually written before returning success. +/// FIX[HIGH-1]: Upgraded from DEFERRED (unchecked_transaction) to IMMEDIATE by +/// issuing BEGIN IMMEDIATE explicitly. A DEFERRED transaction on a write +/// operation can fail with SQLITE_BUSY when the write lock is contested; IMMEDIATE +/// acquires the write lock upfront, eliminating mid-transaction lock escalation. +/// +/// FIX[HIGH-2]: The previous two-round-trip design (one SELECT for the token, +/// a second SELECT for created_at) introduced a race window: the post could be +/// deleted between the token check and the timestamp fetch. Both values are now +/// fetched in a single SELECT inside the IMMEDIATE transaction. pub fn edit_post( conn: &rusqlite::Connection, post_id: i64, @@ -270,81 +353,86 @@ pub fn edit_post( edit_window_secs }; - let tx = conn - .unchecked_transaction() - .context("Failed to begin edit_post transaction")?; - - // Token check — runs inside the transaction so the post can't be deleted - // between this check and the UPDATE below. - let stored: Option = tx - .query_row( - "SELECT deletion_token FROM posts WHERE id = ?1", - params![post_id], - |r| r.get(0), - ) - .optional()?; - - let token_ok = stored - .map(|s| constant_time_eq(s.as_bytes(), token.as_bytes())) - .unwrap_or(false); - - if !token_ok { - tx.rollback().ok(); - return Ok(false); - } - - let created_at: Option = tx - .query_row( - "SELECT created_at FROM posts WHERE id = ?1", - params![post_id], - |r| r.get(0), - ) - .optional()?; - - let created_at = match created_at { - Some(t) => t, - None => { - tx.rollback().ok(); + // BEGIN IMMEDIATE acquires the write lock now, preventing any concurrent + // writer from modifying the post between our SELECT and UPDATE. + conn.execute_batch("BEGIN IMMEDIATE") + .context("Failed to begin IMMEDIATE transaction for edit_post")?; + + let result: Result = (|| { + // FIX[HIGH-2]: Fetch token and created_at in a single round-trip. + let row: Option<(String, i64)> = conn + .query_row( + "SELECT deletion_token, created_at FROM posts WHERE id = ?1", + params![post_id], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .optional()?; + + let (stored_token, created_at) = match row { + Some(r) => r, + None => return Ok(false), // post does not exist + }; + + if !constant_time_eq(stored_token.as_bytes(), token.as_bytes()) { return Ok(false); } - }; - let now = chrono::Utc::now().timestamp(); - if now - created_at > window { - tx.rollback().ok(); - return Ok(false); - } - - tx.execute( - "UPDATE posts SET body = ?1, body_html = ?2, edited_at = ?3 WHERE id = ?4", - params![new_body, new_body_html, now, post_id], - )?; + let now = chrono::Utc::now().timestamp(); + if now - created_at > window { + return Ok(false); + } - // Confirm the row was actually updated (it could have been deleted by a - // concurrent admin action between our SELECT and this UPDATE — both now - // happen under the same transaction, but in DEFERRED mode a concurrent - // writer could have slipped in; IMMEDIATE below prevents this, but we - // check changes() as a belt-and-suspenders guard regardless). - let updated = tx.changes() > 0; + conn.execute( + "UPDATE posts SET body = ?1, body_html = ?2, edited_at = ?3 WHERE id = ?4", + params![new_body, new_body_html, now, post_id], + )?; - tx.commit() - .context("Failed to commit edit_post transaction")?; + // Belt-and-suspenders: confirm the row was actually written. + Ok(conn.changes() > 0) + })(); - Ok(updated) + match result { + Ok(updated) => { + conn.execute_batch("COMMIT") + .context("Failed to commit edit_post transaction")?; + Ok(updated) + } + Err(e) => { + let _ = conn.execute_batch("ROLLBACK"); + Err(e) + } + } } /// Constant-time byte slice comparison to prevent timing side-channel attacks. +/// +/// FIX[MED-7]: The previous implementation returned false immediately when +/// lengths differed, leaking token length as a timing signal. The comparison +/// now processes all bytes from the longer slice regardless of length, folding +/// the length mismatch into the accumulator. fn constant_time_eq(a: &[u8], b: &[u8]) -> bool { - if a.len() != b.len() { - return false; + let max_len = a.len().max(b.len()); + // Non-zero when lengths differ. + let mut diff = (a.len() ^ b.len()) as u8; + for i in 0..max_len { + let x = a.get(i).copied().unwrap_or(0); + let y = b.get(i).copied().unwrap_or(0); + diff |= x ^ y; } - let diff = a - .iter() - .zip(b.iter()) - .fold(0u8, |acc, (x, y)| acc | (x ^ y)); diff == 0 } +// ─── LIKE escape helper ─────────────────────────────────────────────────────── + +/// FIX[MED-8]: Extracted from search_posts and count_search_results to avoid +/// duplicating the escape logic. Escapes `%` and `_` metacharacters so that +/// user-supplied query strings are treated as literal substrings. +fn like_escape(query: &str) -> String { + format!("%{}%", query.replace('%', "\\%").replace('_', "\\_")) +} + +// ─── Search ─────────────────────────────────────────────────────────────────── + /// Full-text search across post bodies. pub fn search_posts( conn: &rusqlite::Connection, @@ -353,7 +441,7 @@ pub fn search_posts( limit: i64, offset: i64, ) -> Result> { - let pattern = format!("%{}%", query.replace('%', "\\%").replace('_', "\\_")); + let pattern = like_escape(query); let mut stmt = conn.prepare_cached( "SELECT id, thread_id, board_id, name, tripcode, subject, body, body_html, ip_hash, file_path, file_name, file_size, thumb_path, mime_type, @@ -374,7 +462,7 @@ pub fn count_search_results( board_id: i64, query: &str, ) -> Result { - let pattern = format!("%{}%", query.replace('%', "\\%").replace('_', "\\_")); + let pattern = like_escape(query); Ok(conn.query_row( "SELECT COUNT(*) FROM posts WHERE board_id = ?1 AND body LIKE ?2 ESCAPE '\\'", params![board_id, pattern], @@ -422,6 +510,10 @@ pub fn record_file_hash( // ─── Poll queries ───────────────────────────────────────────────────────────── /// Create a poll with its options atomically. +/// +/// FIX[MED-5]: Replaced last_insert_rowid() with INSERT … RETURNING id so the +/// poll id is retrieved atomically in the same statement rather than relying on +/// connection-local state. pub fn create_poll( conn: &rusqlite::Connection, thread_id: i64, @@ -435,22 +527,36 @@ pub fn create_poll( let tx = conn .unchecked_transaction() .context("Failed to begin poll transaction")?; - tx.execute( - "INSERT INTO polls (thread_id, question, expires_at) VALUES (?1, ?2, ?3)", - params![thread_id, question, expires_at], - )?; - let poll_id = tx.last_insert_rowid(); + + let poll_id: i64 = tx + .query_row( + "INSERT INTO polls (thread_id, question, expires_at) VALUES (?1, ?2, ?3) + RETURNING id", + params![thread_id, question, expires_at], + |r| r.get(0), + ) + .context("Failed to insert poll")?; + + let mut opt_stmt = tx + .prepare_cached("INSERT INTO poll_options (poll_id, text, position) VALUES (?1, ?2, ?3)")?; for (i, text) in options.iter().enumerate() { - tx.execute( - "INSERT INTO poll_options (poll_id, text, position) VALUES (?1, ?2, ?3)", - params![poll_id, text, i as i64], - )?; + opt_stmt + .execute(params![poll_id, text, i as i64]) + .context("Failed to insert poll option")?; } + drop(opt_stmt); // release borrow on tx before commit + tx.commit().context("Failed to commit poll transaction")?; Ok(poll_id) } /// Fetch the full poll for a thread including vote counts and the user's choice. +/// +/// Note: poll expiry is checked against the application clock (chrono::Utc::now) +/// while poll_votes are pruned using the SQLite clock (unixepoch()). A skew +/// between the two clocks (e.g. container time drift) could cause a poll to +/// appear expired to the application before SQLite prunes it, or vice versa. +/// In practice the skew is negligible for typical deployments. pub fn get_poll_for_thread( conn: &rusqlite::Connection, thread_id: i64, @@ -522,19 +628,19 @@ pub fn get_poll_for_thread( })) } -/// Cast a vote. Returns true if vote was recorded, false if already voted. +/// Cast a vote. Returns true if vote was recorded, false otherwise. +/// +/// FIX[CROSS-POLL]: Validates that `option_id` belongs to `poll_id` inside +/// the same INSERT statement via a correlated WHERE EXISTS. A mismatched +/// (poll_id, option_id) pair inserts nothing and returns false. /// -/// FIX[CROSS-POLL]: Previously there was no validation that `option_id` -/// belongs to `poll_id`. A user could submit poll_id=1, option_id=5 where -/// option 5 actually belongs to poll 2. The rogue row would be inserted and, -/// because the vote-count query joins on option_id alone (see get_poll_for_thread), -/// the vote would be counted for poll 2 / option 5 — inflating results on a -/// poll the attacker never legitimately participated in. +/// Note (MED-14): This function returns false for two distinct cases: +/// 1. The voter has already voted (UNIQUE constraint fires INSERT OR IGNORE) +/// 2. The option_id does not belong to poll_id (EXISTS check fails) /// -/// We now verify the option belongs to the poll inside the same INSERT -/// statement using a SELECT subquery. If the option does not exist in this -/// poll, the SELECT returns no rows, the INSERT inserts nothing, and we return -/// false. This is a single atomic operation with no TOCTOU gap. +/// Callers that need to distinguish these cases should call cast_vote and, on +/// false, separately query whether the IP has voted on this poll. A future +/// refactor could return a tri-state enum instead. pub fn cast_vote( conn: &rusqlite::Connection, poll_id: i64, @@ -575,13 +681,18 @@ pub fn get_poll_context( // ─── Poll maintenance ───────────────────────────────────────────────────────── /// Delete vote rows for polls whose `expires_at` is older than the given -/// retention cutoff (a Unix timestamp). The poll question and options are +/// cutoff timestamp (a Unix timestamp). The poll question and options are /// preserved for historical display; only the per-IP vote records are pruned. /// /// Returns the number of vote rows deleted. +/// +/// Note (MED-11): The parameter was previously named `retention_cutoff` in some +/// call sites, which is misleading — a lower value retains more votes, and a +/// higher value prunes more. It is more accurately described as an "expiry +/// cutoff": any poll that expired before this timestamp has its votes pruned. pub fn cleanup_expired_poll_votes( conn: &rusqlite::Connection, - retention_cutoff: i64, + expiry_cutoff: i64, ) -> Result { let n = conn.execute( "DELETE FROM poll_votes @@ -589,7 +700,7 @@ pub fn cleanup_expired_poll_votes( SELECT id FROM polls WHERE expires_at IS NOT NULL AND expires_at < ?1 )", - params![retention_cutoff], + params![expiry_cutoff], )?; Ok(n) } @@ -600,13 +711,18 @@ pub fn cleanup_expired_poll_votes( // claim_next_job uses UPDATE … RETURNING for atomic claim with no TOCTOU race. /// Persist a new job in the pending state. Returns the new row id. +/// +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). pub fn enqueue_job(conn: &rusqlite::Connection, job_type: &str, payload: &str) -> Result { - conn.execute( - "INSERT INTO background_jobs (job_type, payload, status, updated_at) - VALUES (?1, ?2, 'pending', unixepoch())", - params![job_type, payload], - )?; - Ok(conn.last_insert_rowid()) + let id: i64 = conn + .query_row( + "INSERT INTO background_jobs (job_type, payload, status, updated_at) + VALUES (?1, ?2, 'pending', unixepoch()) RETURNING id", + params![job_type, payload], + |r| r.get(0), + ) + .context("Failed to enqueue job")?; + Ok(id) } /// Atomically claim the highest-priority pending job that has not exhausted @@ -622,39 +738,53 @@ pub fn claim_next_job(conn: &rusqlite::Connection) -> Result(0)?, r.get::<_, String>(1)?))) + .query_row(params![MAX_JOB_ATTEMPTS], |r| { + Ok((r.get::<_, i64>(0)?, r.get::<_, String>(1)?)) + }) .optional()?; Ok(result) } /// Mark a job as successfully completed. +/// +/// FIX[LOW-16]: Added rows-affected check — silently succeeding for an unknown +/// job_id made double-complete bugs invisible. pub fn complete_job(conn: &rusqlite::Connection, id: i64) -> Result<()> { - conn.execute( + let n = conn.execute( "UPDATE background_jobs SET status = 'done', updated_at = unixepoch() - WHERE id = ?1", + WHERE id = ?1 AND status = 'running'", params![id], )?; + if n == 0 { + anyhow::bail!("Job {} not found or not in 'running' state", id); + } Ok(()) } -/// Record a job failure. After MAX_ATTEMPTS the job stays "failed" permanently. +/// Record a job failure. After MAX_JOB_ATTEMPTS the job stays "failed" permanently. +/// +/// FIX[LOW-16]: Added rows-affected check. +/// FIX[MED-6]: Uses MAX_JOB_ATTEMPTS constant instead of duplicating the magic number. pub fn fail_job(conn: &rusqlite::Connection, id: i64, error: &str) -> Result<()> { let err_trunc: String = error.chars().take(512).collect(); - conn.execute( + let n = conn.execute( "UPDATE background_jobs - SET status = CASE WHEN attempts >= 3 THEN 'failed' ELSE 'pending' END, + SET status = CASE WHEN attempts >= ?3 THEN 'failed' ELSE 'pending' END, last_error = ?2, updated_at = unixepoch() - WHERE id = ?1", - params![id, err_trunc], + WHERE id = ?1 AND status = 'running'", + params![id, err_trunc, MAX_JOB_ATTEMPTS], )?; + if n == 0 { + anyhow::bail!("Job {} not found or not in 'running' state", id); + } Ok(()) } @@ -695,6 +825,12 @@ pub fn update_post_file_info( /// and marks it safe to delete — but the WebM it was replaced with would also /// be considered orphaned the next time any of those stale posts is deleted. /// +/// Note (MED-15): This function does NOT update the corresponding file_hashes +/// row. The caller MUST call delete_file_hash_by_path(old_path) and then +/// record_file_hash(sha256, new_path, ...) after this function returns, before +/// any subsequent paths_safe_to_delete call. Failure to do so leaves the dedup +/// table pointing at the old (now-deleted) path. +/// /// Returns the number of posts updated. pub fn update_all_posts_file_path( conn: &rusqlite::Connection, diff --git a/src/db/threads.rs b/src/db/threads.rs index e4a035a..7e7e08c 100644 --- a/src/db/threads.rs +++ b/src/db/threads.rs @@ -7,57 +7,155 @@ // create_thread_with_op → super::posts::create_post_inner (OP insert) // delete_thread → super::paths_safe_to_delete (file safety) // prune_old_threads → super::paths_safe_to_delete (file safety) +// +// FIX summary (from audit): +// HIGH-1 delete_thread: SELECT+DELETE now atomic inside a transaction +// HIGH-2 prune_old_threads: paths_safe_to_delete moved inside transaction +// so it sees the post-delete DB state before any concurrent insert +// HIGH-3 archive_old_threads / prune_old_threads: ID collection query +// moved inside the transaction to close the TOCTOU race +// MED-4 create_thread_with_op: raw BEGIN/COMMIT replaced with structured +// helper using execute_batch for cleaner error flow +// MED-5 prune_old_threads: prepare_cached inside loop is now a single +// prepare_cached outside the loop (was documented as fixed but +// was not actually implemented) +// MED-6 bump_thread: not co-transactional with post insert — documented +// MED-7 set_thread_archived(false): no longer unconditionally unlocks; +// locked state is only changed when archiving +// MED-8 image_count correlated subquery: replaced with LEFT JOIN + COUNT +// MED-9 map_thread helper: extracted from 3 copy-pasted closures +// LOW-10 LIMIT -1 OFFSET ?: documented as SQLite-specific idiom +// LOW-11 File-path collection: extracted into collect_thread_file_paths helper +// MED-13 archive_old_threads / prune_old_threads: N single-row operations +// replaced with bulk WHERE id IN (...) +// MED-17 prune_old_threads: N per-thread file-path queries replaced with +// a single JOIN query use crate::models::*; use anyhow::{Context, Result}; use rusqlite::{params, OptionalExtension}; +// ─── Row mapper ─────────────────────────────────────────────────────────────── + +/// Map a thread row. Column layout (must match every SELECT that calls this): +/// 0 t.id 4 t.bumped_at 8 op.body 12 op.tripcode +/// 1 t.board_id 5 t.locked 9 op.file_path 13 op.id (op_id) +/// 2 t.subject 6 t.sticky 10 op.thumb_path 14 t.archived +/// 3 t.created_at 7 t.reply_count 11 op.name 15 image_count +/// +/// FIX[MED-9]: Extracted from three copy-pasted closures into a single helper, +/// eliminating the risk of the three copies diverging. +fn map_thread(row: &rusqlite::Row<'_>) -> rusqlite::Result { + Ok(Thread { + id: row.get(0)?, + board_id: row.get(1)?, + subject: row.get(2)?, + created_at: row.get(3)?, + bumped_at: row.get(4)?, + locked: row.get::<_, i32>(5)? != 0, + sticky: row.get::<_, i32>(6)? != 0, + reply_count: row.get(7)?, + op_body: row.get(8)?, + op_file: row.get(9)?, + op_thumb: row.get(10)?, + op_name: row.get(11)?, + op_tripcode: row.get(12)?, + op_id: row.get(13)?, + archived: row.get::<_, i32>(14)? != 0, + image_count: row.get(15)?, + }) +} + +// ─── File-path collection helper ────────────────────────────────────────────── + +/// Collect all file paths (file_path, thumb_path, audio_file_path) for every +/// post in the given set of thread ids. Returns a flat Vec of non-null paths. +/// +/// FIX[LOW-11]: Extracted from delete_thread and prune_old_threads to eliminate +/// copy-pasted collection loops. +/// FIX[MED-17]: Uses a single JOIN query instead of one query per thread. +/// +/// IMPORTANT: This must be called BEFORE the thread rows are deleted so that +/// the posts still exist. The CASCADE on threads→posts removes them atomically +/// with the thread row when you later execute DELETE FROM threads. +fn collect_thread_file_paths( + conn: &rusqlite::Connection, + thread_ids: &[i64], +) -> Result> { + if thread_ids.is_empty() { + return Ok(Vec::new()); + } + + // Build WHERE thread_id IN (?, ?, ...) dynamically. + let placeholders: String = thread_ids + .iter() + .enumerate() + .map(|(i, _)| format!("?{}", i + 1)) + .collect::>() + .join(", "); + let sql = format!( + "SELECT file_path, thumb_path, audio_file_path + FROM posts WHERE thread_id IN ({})", + placeholders + ); + + let mut stmt = conn.prepare(&sql)?; + let rows: Vec<(Option, Option, Option)> = stmt + .query_map(rusqlite::params_from_iter(thread_ids), |r| { + Ok((r.get(0)?, r.get(1)?, r.get(2)?)) + })? + .collect::>()?; + + let mut paths = Vec::new(); + for (f, t, a) in rows { + if let Some(p) = f { + paths.push(p); + } + if let Some(p) = t { + paths.push(p); + } + if let Some(p) = a { + paths.push(p); + } + } + Ok(paths) +} + // ─── Board-index thread listing ─────────────────────────────────────────────── +/// The canonical thread SELECT fragment shared by all listing queries. +/// +/// FIX[MED-8]: Replaced the correlated `image_count` subquery (which ran once +/// per thread row) with a LEFT JOIN aggregation so the count is computed in a +/// single pass. The GROUP BY ensures one output row per thread. +const THREAD_SELECT: &str = " + SELECT t.id, t.board_id, t.subject, t.created_at, t.bumped_at, + t.locked, t.sticky, t.reply_count, + op.body, op.file_path, op.thumb_path, op.name, op.tripcode, op.id, + t.archived, + COUNT(DISTINCT fp.id) AS image_count + FROM threads t + JOIN posts op ON op.thread_id = t.id AND op.is_op = 1 + LEFT JOIN posts fp ON fp.thread_id = t.id AND fp.file_path IS NOT NULL"; + /// Get paginated threads for a board with OP preview data. +/// Sticky threads float to the top, then sorted by most recent bump. pub fn get_threads_for_board( conn: &rusqlite::Connection, board_id: i64, limit: i64, offset: i64, ) -> Result> { - // Sticky threads float to top, then sorted by most recent bump. - let mut stmt = conn.prepare_cached( - "SELECT t.id, t.board_id, t.subject, t.created_at, t.bumped_at, - t.locked, t.sticky, t.reply_count, - op.body, op.file_path, op.thumb_path, op.name, op.tripcode, op.id, - t.archived, - (SELECT COUNT(*) FROM posts p WHERE p.thread_id = t.id - AND p.file_path IS NOT NULL - ) AS image_count - FROM threads t - JOIN posts op ON op.thread_id = t.id AND op.is_op = 1 + let sql = format!( + "{THREAD_SELECT} WHERE t.board_id = ?1 AND t.archived = 0 + GROUP BY t.id, op.id ORDER BY t.sticky DESC, t.bumped_at DESC - LIMIT ?2 OFFSET ?3", - )?; - + LIMIT ?2 OFFSET ?3" + ); + let mut stmt = conn.prepare_cached(&sql)?; let threads = stmt - .query_map(params![board_id, limit, offset], |row| { - Ok(Thread { - id: row.get(0)?, - board_id: row.get(1)?, - subject: row.get(2)?, - created_at: row.get(3)?, - bumped_at: row.get(4)?, - locked: row.get::<_, i32>(5)? != 0, - sticky: row.get::<_, i32>(6)? != 0, - reply_count: row.get(7)?, - op_body: row.get(8)?, - op_file: row.get(9)?, - op_thumb: row.get(10)?, - op_name: row.get(11)?, - op_tripcode: row.get(12)?, - op_id: row.get(13)?, - archived: row.get::<_, i32>(14)? != 0, - image_count: row.get(15)?, - }) - })? + .query_map(params![board_id, limit, offset], map_thread)? .collect::>>()?; Ok(threads) } @@ -71,54 +169,27 @@ pub fn count_threads_for_board(conn: &rusqlite::Connection, board_id: i64) -> Re } pub fn get_thread(conn: &rusqlite::Connection, thread_id: i64) -> Result> { - let mut stmt = conn.prepare_cached( - "SELECT t.id, t.board_id, t.subject, t.created_at, t.bumped_at, - t.locked, t.sticky, t.reply_count, - op.body, op.file_path, op.thumb_path, op.name, op.tripcode, op.id, - t.archived, - (SELECT COUNT(*) FROM posts p WHERE p.thread_id = t.id - AND p.file_path IS NOT NULL - ) AS image_count - FROM threads t - JOIN posts op ON op.thread_id = t.id AND op.is_op = 1 - WHERE t.id = ?1", - )?; - Ok(stmt - .query_row(params![thread_id], |row| { - Ok(Thread { - id: row.get(0)?, - board_id: row.get(1)?, - subject: row.get(2)?, - created_at: row.get(3)?, - bumped_at: row.get(4)?, - locked: row.get::<_, i32>(5)? != 0, - sticky: row.get::<_, i32>(6)? != 0, - reply_count: row.get(7)?, - op_body: row.get(8)?, - op_file: row.get(9)?, - op_thumb: row.get(10)?, - op_name: row.get(11)?, - op_tripcode: row.get(12)?, - op_id: row.get(13)?, - archived: row.get::<_, i32>(14)? != 0, - image_count: row.get(15)?, - }) - }) - .optional()?) + let sql = format!( + "{THREAD_SELECT} + WHERE t.id = ?1 + GROUP BY t.id, op.id" + ); + let mut stmt = conn.prepare_cached(&sql)?; + Ok(stmt.query_row(params![thread_id], map_thread).optional()?) } // ─── Thread creation (atomic with OP post) ──────────────────────────────────── /// Create a thread AND its OP post atomically in a single transaction. /// -/// FIX[MEDIUM-3]: The previous design had two separate DB calls — create_thread -/// followed by create_post — with no transaction. A crash between the two calls -/// left an orphaned thread with no OP post, causing all board-listing queries -/// (which JOIN on is_op=1) to silently skip the thread forever. +/// The invariant guaranteed here: every thread row has exactly one corresponding +/// post with is_op=1. The previous design used two separate DB calls with no +/// transaction, leaving orphaned threads on crash. /// -/// This function is the single entry point for thread creation and wraps both -/// operations in a transaction, guaranteeing the invariant that every thread -/// row has exactly one corresponding post with is_op=1. +/// FIX[MED-4]: Replaced the raw `conn.execute("BEGIN IMMEDIATE", [])?` / +/// `conn.execute("COMMIT", [])` pattern with execute_batch calls that keep the +/// error handling structured and avoid the subtle issue of a raw string +/// transaction leaking through rusqlite's normal transaction tracking. /// /// Returns (thread_id, post_id). pub fn create_thread_with_op( @@ -127,18 +198,22 @@ pub fn create_thread_with_op( subject: Option<&str>, post: &super::NewPost, ) -> Result<(i64, i64)> { - conn.execute("BEGIN IMMEDIATE", [])?; + // BEGIN IMMEDIATE acquires the write lock upfront to avoid SQLITE_BUSY + // during the lock-upgrade step that DEFERRED transactions perform on first + // write. With &Connection (not &mut Connection) we cannot use rusqlite's + // typed Transaction::new(Immediate), so we issue the pragma directly. + conn.execute_batch("BEGIN IMMEDIATE") + .context("Failed to begin IMMEDIATE transaction for create_thread_with_op")?; - let result = (|| -> Result<(i64, i64)> { - // 1.5: Use RETURNING id to retrieve the new thread ID atomically in the - // same statement, removing the implicit coupling to connection state that - // last_insert_rowid() has (safe here but fragile by design). + let result: Result<(i64, i64)> = (|| { let thread_id: i64 = conn.query_row( "INSERT INTO threads (board_id, subject) VALUES (?1, ?2) RETURNING id", params![board_id, subject], |r| r.get(0), )?; + // Bind thread_id and is_op into the post struct. We avoid a Clone of + // the entire struct by building a minimal wrapper with references. let post_with_thread = super::NewPost { thread_id, is_op: true, @@ -151,14 +226,12 @@ pub fn create_thread_with_op( match result { Ok(ids) => { - if let Err(e) = conn.execute("COMMIT", []) { - let _ = conn.execute("ROLLBACK", []); - return Err(anyhow::anyhow!("Transaction commit failed: {}", e)); - } + conn.execute_batch("COMMIT") + .context("Failed to commit create_thread_with_op transaction")?; Ok(ids) } Err(e) => { - let _ = conn.execute("ROLLBACK", []); + let _ = conn.execute_batch("ROLLBACK"); Err(e) } } @@ -166,6 +239,15 @@ pub fn create_thread_with_op( // ─── Thread mutation ────────────────────────────────────────────────────────── +/// Bump a thread's `bumped_at` timestamp and increment `reply_count`. +/// +/// Note (MED-6): bump_thread is called from the route handler after +/// create_post returns, not inside the same transaction as the post insert. +/// If the process crashes between the two calls, reply_count and bumped_at +/// can be one behind reality. A full fix would require moving bump_thread +/// into create_post_inner, which would change the API surface. Accepted as +/// a known minor inconsistency; the reply_count column is advisory and a +/// board reload corrects the displayed count. pub fn bump_thread(conn: &rusqlite::Connection, thread_id: i64) -> Result<()> { conn.execute( "UPDATE threads SET bumped_at = unixepoch(), reply_count = reply_count + 1 @@ -192,82 +274,120 @@ pub fn set_thread_locked(conn: &rusqlite::Connection, thread_id: i64, locked: bo } /// Move a thread to (or out of) the board archive. -/// Archiving also locks the thread so no new replies can be posted. +/// +/// FIX[MED-7]: The previous implementation used `SET archived = ?1, locked = ?1` +/// which unconditionally unlocked threads when called with archived=false. If a +/// moderator had explicitly locked a thread before archiving it, unarchiving +/// would silently unlock it, discarding the moderator's intent. +/// +/// The new logic: archiving always locks the thread; unarchiving only restores +/// the archived flag and leaves locked untouched. Callers that want to unlock +/// a thread after unarchiving should call set_thread_locked separately. pub fn set_thread_archived( conn: &rusqlite::Connection, thread_id: i64, archived: bool, ) -> Result<()> { conn.execute( - "UPDATE threads SET archived = ?1, locked = ?1 WHERE id = ?2", + "UPDATE threads + SET archived = ?1, + locked = CASE WHEN ?1 = 1 THEN 1 ELSE locked END + WHERE id = ?2", params![archived as i32, thread_id], )?; Ok(()) } +/// Delete a thread and return on-disk paths that are now safe to remove. +/// +/// FIX[HIGH-1]: The previous SELECT → DELETE sequence was not wrapped in a +/// transaction. A concurrent delete (e.g. admin panel + prune running together) +/// could delete the posts between our SELECT and DELETE, causing the returned +/// path list to include paths that had already been cleaned up by the other +/// operation — producing spurious filesystem errors. +/// +/// The full sequence is now atomic: +/// 1. Collect file paths (while posts still exist) +/// 2. DELETE the thread (CASCADE removes posts) +/// 3. paths_safe_to_delete inside the transaction sees the post-delete state +/// 4. COMMIT pub fn delete_thread(conn: &rusqlite::Connection, thread_id: i64) -> Result> { - let mut stmt = conn.prepare_cached( - "SELECT file_path, thumb_path, audio_file_path FROM posts WHERE thread_id = ?1", - )?; - let rows: Vec<(Option, Option, Option)> = stmt - .query_map(params![thread_id], |r| { - Ok((r.get(0)?, r.get(1)?, r.get(2)?)) - })? - .collect::>()?; + let tx = conn + .unchecked_transaction() + .context("Failed to begin delete_thread transaction")?; - let mut candidates = Vec::new(); - for (f, t, a) in rows { - if let Some(p) = f { - candidates.push(p); - } - if let Some(p) = t { - candidates.push(p); - } - if let Some(p) = a { - candidates.push(p); - } - } + // Step 1: collect file paths from all posts in this thread. + let candidates = collect_thread_file_paths(&tx, &[thread_id])?; - conn.execute("DELETE FROM threads WHERE id = ?1", params![thread_id])?; - Ok(super::paths_safe_to_delete(conn, candidates)) + // Step 2: delete thread (CASCADE removes posts). + tx.execute("DELETE FROM threads WHERE id = ?1", params![thread_id]) + .context("Failed to delete thread")?; + + // Step 3: determine which paths are now unreferenced. + // paths_safe_to_delete sees the post-delete state because we're still inside + // the same transaction. + let safe = super::paths_safe_to_delete(&tx, candidates); + + tx.commit() + .context("Failed to commit delete_thread transaction")?; + Ok(safe) } // ─── Archive / prune ────────────────────────────────────────────────────────── /// Archive oldest non-sticky threads that exceed the board's max_threads limit. -/// Archived threads are locked and marked read-only instead of deleted, so their -/// content remains accessible via /{board}/archive. +/// Archived threads are locked and marked read-only; their content remains +/// accessible via /{board}/archive. /// Returns the count of threads archived (no file deletion occurs). /// -/// FIX[ARCHIVE-TXN]: Previously each per-thread UPDATE was auto-committed -/// individually. A crash mid-loop left the board partially archived: some -/// threads invisible from the board index, others still live. All updates are -/// now wrapped in a single transaction so the operation is all-or-nothing. +/// FIX[HIGH-3]: The ID collection query is now inside the same transaction as +/// the UPDATEs, closing the TOCTOU race where a concurrent bump could change +/// the ordering between the SELECT and the UPDATE loop. +/// +/// FIX[MED-13]: Replaced the N per-row UPDATE loop with a single bulk +/// UPDATE … WHERE id IN (…), which is both faster and more crash-safe. +/// +/// Note: LIMIT -1 OFFSET ? is a SQLite-specific idiom for "skip the first +/// max rows, return everything else". It is not standard SQL. The LIMIT -1 +/// means "no upper bound on the result set after the offset is applied". pub fn archive_old_threads(conn: &rusqlite::Connection, board_id: i64, max: i64) -> Result { + let tx = conn + .unchecked_transaction() + .context("Failed to begin archive_old_threads transaction")?; + + // Collect inside the transaction to prevent races with concurrent bumps. let ids: Vec = { - let mut stmt = conn.prepare_cached( + let mut stmt = tx.prepare_cached( "SELECT id FROM threads WHERE board_id = ?1 AND sticky = 0 AND archived = 0 ORDER BY bumped_at DESC LIMIT -1 OFFSET ?2", )?; - let ids = stmt + let x = stmt .query_map(params![board_id, max], |r| r.get(0))? .collect::>>()?; - ids + x }; + let count = ids.len(); if count == 0 { + tx.rollback().ok(); return Ok(0); } - let tx = conn - .unchecked_transaction() - .context("Failed to begin archive_old_threads transaction")?; - for id in ids { - tx.execute( - "UPDATE threads SET archived = 1, locked = 1 WHERE id = ?1", - params![id], - )?; - } + + // Single bulk UPDATE instead of N individual statements. + let placeholders: String = ids + .iter() + .enumerate() + .map(|(i, _)| format!("?{}", i + 1)) + .collect::>() + .join(", "); + let sql = format!( + "UPDATE threads SET archived = 1, locked = 1 WHERE id IN ({})", + placeholders + ); + tx.execute(&sql, rusqlite::params_from_iter(&ids)) + .context("Failed to bulk archive threads")?; + tx.commit() .context("Failed to commit archive_old_threads transaction")?; Ok(count) @@ -280,66 +400,72 @@ pub fn archive_old_threads(conn: &rusqlite::Connection, board_id: i64, max: i64) /// referenced by any remaining post after the prune). The caller is responsible /// for actually removing these files from disk. /// -/// FIX[PRUNE-TXN]: Previously each per-thread DELETE was auto-committed -/// individually. A crash mid-loop left some threads deleted from the DB but -/// their file paths never returned to the caller, producing unreachable orphaned -/// files on disk. All deletes are now wrapped in a single transaction. +/// FIX[HIGH-3]: ID collection query is now inside the transaction (see above). +/// +/// FIX[HIGH-2]: paths_safe_to_delete is called INSIDE the transaction before +/// COMMIT so it sees the post-delete state atomically. Previously it ran after +/// COMMIT, leaving a narrow window where a concurrent post insert could +/// reference a just-pruned file before we checked it. +/// +/// FIX[MED-5]: prepare_cached is now used OUTSIDE the loop (was documented as +/// fixed but the prepare_cached call was still inside the loop). /// -/// FIX[PREPARE-LOOP]: The inner prepare() was called once per loop iteration, -/// recompiling the same statement N times. Replaced with prepare_cached(). +/// FIX[MED-13]: Replaced the N per-row DELETE loop with a single bulk DELETE. +/// +/// FIX[MED-17]: File-path collection is now a single JOIN query instead of +/// one query per thread id. +/// +/// Note: LIMIT -1 OFFSET ? is a SQLite-specific idiom — see archive_old_threads. pub fn prune_old_threads( conn: &rusqlite::Connection, board_id: i64, max: i64, ) -> Result> { + let tx = conn + .unchecked_transaction() + .context("Failed to begin prune_old_threads transaction")?; + + // Collect ids inside the transaction to prevent concurrent bumps from + // changing the ordering between the SELECT and the DELETE. let ids: Vec = { - let mut stmt = conn.prepare_cached( + let mut stmt = tx.prepare_cached( "SELECT id FROM threads WHERE board_id = ?1 AND sticky = 0 AND archived = 0 ORDER BY bumped_at DESC LIMIT -1 OFFSET ?2", )?; - let ids = stmt + let x = stmt .query_map(params![board_id, max], |r| r.get(0))? .collect::>>()?; - ids + x }; if ids.is_empty() { + tx.rollback().ok(); return Ok(Vec::new()); } - let tx = conn - .unchecked_transaction() - .context("Failed to begin prune_old_threads transaction")?; + // Collect all file paths in a single query BEFORE the DELETEs. + let candidates = collect_thread_file_paths(&tx, &ids)?; - let mut candidates: Vec = Vec::new(); - for id in &ids { - let mut stmt = tx.prepare_cached( - "SELECT file_path, thumb_path, audio_file_path FROM posts WHERE thread_id = ?1", - )?; - let rows: Vec<(Option, Option, Option)> = stmt - .query_map(params![id], |r: &rusqlite::Row| { - Ok((r.get(0)?, r.get(1)?, r.get(2)?)) - })? - .collect::>()?; - for (f, t, a) in rows { - if let Some(p) = f { - candidates.push(p); - } - if let Some(p) = t { - candidates.push(p); - } - if let Some(p) = a { - candidates.push(p); - } - } - tx.execute("DELETE FROM threads WHERE id = ?1", params![id])?; - } + // Single bulk DELETE instead of N individual statements. + let placeholders: String = ids + .iter() + .enumerate() + .map(|(i, _)| format!("?{}", i + 1)) + .collect::>() + .join(", "); + let sql = format!("DELETE FROM threads WHERE id IN ({})", placeholders); + tx.execute(&sql, rusqlite::params_from_iter(&ids)) + .context("Failed to bulk delete pruned threads")?; + + // Determine safe paths INSIDE the transaction so the check sees the + // post-delete state before any concurrent writer can insert new references. + let safe = super::paths_safe_to_delete(&tx, candidates); tx.commit() .context("Failed to commit prune_old_threads transaction")?; - Ok(super::paths_safe_to_delete(conn, candidates)) + Ok(safe) } // ─── Archive listing ────────────────────────────────────────────────────────── @@ -350,41 +476,16 @@ pub fn get_archived_threads_for_board( limit: i64, offset: i64, ) -> Result> { - let mut stmt = conn.prepare_cached( - "SELECT t.id, t.board_id, t.subject, t.created_at, t.bumped_at, - t.locked, t.sticky, t.reply_count, - op.body, op.file_path, op.thumb_path, op.name, op.tripcode, op.id, - t.archived, - (SELECT COUNT(*) FROM posts p WHERE p.thread_id = t.id - AND p.file_path IS NOT NULL - ) AS image_count - FROM threads t - JOIN posts op ON op.thread_id = t.id AND op.is_op = 1 + let sql = format!( + "{THREAD_SELECT} WHERE t.board_id = ?1 AND t.archived = 1 + GROUP BY t.id, op.id ORDER BY t.bumped_at DESC - LIMIT ?2 OFFSET ?3", - )?; + LIMIT ?2 OFFSET ?3" + ); + let mut stmt = conn.prepare_cached(&sql)?; let threads = stmt - .query_map(params![board_id, limit, offset], |row| { - Ok(Thread { - id: row.get(0)?, - board_id: row.get(1)?, - subject: row.get(2)?, - created_at: row.get(3)?, - bumped_at: row.get(4)?, - locked: row.get::<_, i32>(5)? != 0, - sticky: row.get::<_, i32>(6)? != 0, - reply_count: row.get(7)?, - op_body: row.get(8)?, - op_file: row.get(9)?, - op_thumb: row.get(10)?, - op_name: row.get(11)?, - op_tripcode: row.get(12)?, - op_id: row.get(13)?, - archived: row.get::<_, i32>(14)? != 0, - image_count: row.get(15)?, - }) - })? + .query_map(params![board_id, limit, offset], map_thread)? .collect::>>()?; Ok(threads) } diff --git a/src/handlers/thread.rs b/src/handlers/thread.rs index 475d07d..0413537 100644 --- a/src/handlers/thread.rs +++ b/src/handlers/thread.rs @@ -659,7 +659,7 @@ pub async fn thread_updates( // Fetch posts newer than `since`, ordered oldest-first so they // render in the correct chronological order when appended. - let posts = db::get_new_posts_since(&conn, thread_id, since)?; + let posts = db::get_new_posts_since(&conn, thread_id, since, 100)?; let last_id = posts.iter().map(|p| p.id).max().unwrap_or(since); let count = posts.len(); diff --git a/src/utils/crypto.rs b/src/utils/crypto.rs index 6368af9..ed99f50 100644 --- a/src/utils/crypto.rs +++ b/src/utils/crypto.rs @@ -3,9 +3,8 @@ // Security primitives: // // • Argon2id for admin password hashing — memory-hard, GPU-resistant. -// Conservative parameters: t=2, m=65536, p=2. -// This costs ~65 MiB RAM and ~200 ms per hash — acceptable for admin login, -// makes brute-force attacks impractical even on purpose-built hardware. +// Parameters: t=2, m=65536 (64 MiB), p=2. +// ~200 ms per hash — acceptable for admin login, impractical to brute-force. // // • SHA-256 for IP hashing — one-way transform. We never store raw IPs. // A salt (the cookie secret) is prepended so the hash can't be reversed @@ -13,59 +12,65 @@ // // • CSRF tokens — 32-byte random value encoded as hex, stored in a signed // cookie. Forms include it as a hidden field; handler verifies cookie == form. -// Using signed cookies means the token can't be forged without the secret key. // // • Session IDs — 32-byte random value encoded as hex. Stored in DB with -// expiry. HTTPOnly + SameSite=Strict cookie — not accessible from JS. +// expiry. HTTPOnly + SameSite=Strict cookie. // // • Deletion tokens — 16-byte random value encoded as hex. Stored in DB. -// Posted as hidden form field at post time; user must supply to delete. // -// FIX[MEDIUM-9]: All random token generation now uses OsRng directly. -// While rand::thread_rng() is cryptographically secure in rand 0.8 (ChaCha12 -// seeded by OsRng), using OsRng explicitly makes the security property -// immediately visible to auditors without requiring knowledge of rand internals. +// All random token generation uses OsRng directly (OS CSPRNG), making the +// security property immediately visible to auditors. use anyhow::Result; use argon2::{ password_hash::{rand_core::OsRng, PasswordHash, PasswordHasher, PasswordVerifier, SaltString}, Algorithm, Argon2, Params, Version, }; -// rand_core::RngCore is the same trait instance as argon2's re-exported -// rand_core::OsRng implements — they share the same rand_core 0.6 crate. use dashmap::DashMap; use once_cell::sync::Lazy; use rand_core::RngCore; use sha2::{Digest, Sha256}; +/// Maximum allowed nonce length in characters. +/// A legitimate nonce is a numeric string from the JS solver; anything longer +/// than this is adversarial or malformed and is rejected before allocation. +const MAX_NONCE_LEN: usize = 128; + +/// Maximum allowed board short-name length in characters. +const MAX_BOARD_SHORT_LEN: usize = 32; + /// Hash an admin password using Argon2id. +/// +/// Parameters: `t_cost=2`, `m_cost=64 MiB`, `p_cost=2`. pub fn hash_password(password: &str) -> Result { let salt = SaltString::generate(&mut OsRng); - // t_cost=2, m_cost=64 MiB, p_cost=2 — conservative, works on any server. - // FIX[LOW-7]: Removed hardware-specific comment. - let params = Params::new(65536, 2, 2, None) - .map_err(|e| anyhow::anyhow!("Argon2 params error: {}", e))?; + let params = + Params::new(65536, 2, 2, None).map_err(|e| anyhow::anyhow!("Argon2 params error: {e}"))?; let argon2 = Argon2::new(Algorithm::Argon2id, Version::V0x13, params); let hash = argon2 .hash_password(password.as_bytes(), &salt) - .map_err(|e| anyhow::anyhow!("Password hashing failed: {}", e))? + .map_err(|e| anyhow::anyhow!("Password hashing failed: {e}"))? .to_string(); Ok(hash) } -/// Verify a password against an Argon2 hash. +/// Verify a password against an Argon2id hash (PHC string format). +/// +/// Returns `Ok(true)` on match, `Ok(false)` on mismatch, `Err` if the stored +/// hash string is malformed. pub fn verify_password(password: &str, hash: &str) -> Result { let parsed = - PasswordHash::new(hash).map_err(|e| anyhow::anyhow!("Invalid password hash: {}", e))?; + PasswordHash::new(hash).map_err(|e| anyhow::anyhow!("Invalid password hash: {e}"))?; Ok(Argon2::default() .verify_password(password.as_bytes(), &parsed) .is_ok()) } -/// Generate a cryptographically secure random hex string of `bytes` length. +/// Generate a cryptographically secure random hex string. /// -/// FIX[MEDIUM-9]: Uses OsRng directly (the OS CSPRNG) rather than thread_rng(), -/// making the security property explicit. +/// `bytes` is the number of random bytes; the returned string is `2 * bytes` +/// hex characters long. Uses [`OsRng`] directly for explicit CSPRNG provenance. +#[must_use] pub fn random_hex(bytes: usize) -> String { let mut buf = vec![0u8; bytes]; OsRng.fill_bytes(&mut buf); @@ -73,22 +78,32 @@ pub fn random_hex(bytes: usize) -> String { } /// Generate a session ID (32 random bytes → 64 hex chars). +#[must_use] +#[inline] pub fn new_session_id() -> String { random_hex(32) } /// Generate a deletion token (16 random bytes → 32 hex chars). +#[must_use] +#[inline] pub fn new_deletion_token() -> String { random_hex(16) } /// Generate a CSRF token (32 random bytes → 64 hex chars). +#[must_use] +#[inline] pub fn new_csrf_token() -> String { random_hex(32) } /// Hash an IP address with a secret salt. Output is a 64-char hex string. +/// /// The salt prevents rainbow-table attacks if the DB is leaked. +/// A `:` separator is placed between salt and IP to prevent ambiguity when +/// one value is a prefix of another. +#[must_use] pub fn hash_ip(ip: &str, salt: &str) -> String { let mut hasher = Sha256::new(); hasher.update(salt.as_bytes()); @@ -99,8 +114,9 @@ pub fn hash_ip(ip: &str, salt: &str) -> String { /// Compute the SHA-256 of arbitrary bytes, returned as lowercase hex. /// -/// FIX[LOW-8]: Deduplicated from board.rs and thread.rs. Handlers should -/// call this instead of defining their own local sha256_hex function. +/// Deduplicated helper — all handlers should call this rather than defining +/// their own local `sha256_hex` function. +#[must_use] pub fn sha256_hex(data: &[u8]) -> String { let mut h = Sha256::new(); h.update(data); @@ -112,72 +128,87 @@ pub fn sha256_hex(data: &[u8]) -> String { // The client finds a nonce such that SHA-256(challenge + ":" + nonce) has at // least POW_DIFFICULTY leading zero bits. -pub const POW_DIFFICULTY: u32 = 20; // ~1M avg iterations; ~50–200 ms in JS +/// Number of leading zero bits required for a valid PoW solution. +/// ~1 M average iterations; ~50–200 ms in browser JS. +pub const POW_DIFFICULTY: u32 = 20; -/// FIX[NEW-C2]: In-memory nonce replay cache. +/// In-memory nonce replay cache. /// -/// Maps "board_short:nonce" → unix timestamp (seconds) at which the nonce was -/// first accepted. Any nonce seen within the PoW validity window (5 minutes) -/// is rejected on a second submission, preventing a solved nonce from being -/// replayed unlimited times. -/// -/// Entries older than POW_WINDOW_SECS are pruned on every call to verify_pow -/// so memory usage is bounded by the rate of legitimate solves. +/// Maps `"board_short:nonce"` → unix timestamp (seconds) of first acceptance. +/// Entries older than [`POW_WINDOW_SECS`] are pruned on every call to +/// [`verify_pow`] so memory usage is bounded by the rate of legitimate solves. static SEEN_NONCES: Lazy> = Lazy::new(DashMap::new); -/// The PoW grace window in seconds — must match the 5 × 60 s window in verify_pow. +/// PoW validity window in seconds (5 minutes). const POW_WINDOW_SECS: i64 = 300; +/// Number of past minutes (inclusive of current) to accept PoW solutions for. +const POW_GRACE_MINUTES: i64 = 5; + /// Build the expected challenge string for the given board and time. +#[must_use] pub fn pow_challenge(board_short: &str, unix_ts: i64) -> String { let minute = unix_ts / 60; - format!("{}:{}", board_short, minute) + format!("{board_short}:{minute}") +} + +/// Returns `true` if every byte in `s` is ASCII alphanumeric, `_`, or `-`. +#[inline] +fn is_valid_board_short(s: &str) -> bool { + !s.is_empty() + && s.len() <= MAX_BOARD_SHORT_LEN + && s.bytes() + .all(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b'-') } -/// Verify a submitted PoW nonce. Accepts solutions for the current minute and -/// up to 4 prior minutes (5-minute grace window covering clock skew + solve time). +/// Returns `true` if `s` is a non-empty, bounded, ASCII-alphanumeric nonce. +#[inline] +fn is_valid_nonce(s: &str) -> bool { + !s.is_empty() && s.len() <= MAX_NONCE_LEN && s.bytes().all(|b| b.is_ascii_alphanumeric()) +} + +/// Verify a submitted PoW nonce. /// -/// FIX[NEW-C2]: Each (board, nonce) pair is recorded in SEEN_NONCES after its -/// first successful verification and rejected on any subsequent call within the -/// same window, closing the replay attack vector. +/// Accepts solutions for the current minute and up to +/// [`POW_GRACE_MINUTES`] − 1 prior minutes (grace window covering clock skew +/// and solve time). /// -/// FIX[RACE]: The previous implementation used a separate contains_key() check -/// followed by insert(), which is not atomic even with DashMap — two concurrent -/// requests carrying the same solved nonce could both pass the check before -/// either inserted, allowing replay under concurrent load. The fix uses the -/// entry API so the check and claim are performed in a single lock acquisition: -/// entry().or_insert_with() returns a reference to the existing value if the -/// key was already present, or inserts and returns the new value. We -/// distinguish "newly inserted by us" from "already existed" by comparing the -/// stored timestamp; if it equals `now` we inserted it, otherwise it was -/// already there. A cleaner approach is to use a dedicated atomic flag, but -/// the timestamp comparison is sufficient here because `now` is captured once -/// before the entry call and any pre-existing entry will have a strictly -/// earlier timestamp (same-second collisions are handled by the equality check -/// being in our favour: we set the value so it is always == now when we win). +/// Each `(board, nonce)` pair is **atomically** claimed on first successful +/// verification via the [`DashMap::entry`] API — the shard lock is held for +/// the duration of the check-and-insert, preventing TOCTOU replay races even +/// under concurrent load. +#[must_use] pub fn verify_pow(board_short: &str, nonce: &str) -> bool { use dashmap::mapref::entry::Entry; - use sha2::{Digest, Sha256}; + + // ── Input validation ────────────────────────────────────────────── + // Reject empty, oversized, or non-ASCII inputs before any allocation + // or hashing work to prevent abuse and cache-key ambiguity. + if !is_valid_board_short(board_short) || !is_valid_nonce(nonce) { + return false; + } + let now = chrono::Utc::now().timestamp(); let now_minutes = now / 60; // Prune stale entries to bound memory usage. SEEN_NONCES.retain(|_, ts| now - *ts < POW_WINDOW_SECS); - // Try current minute and the 4 prior minutes. - let cache_key = format!("{}:{}", board_short, nonce); - for delta in 0i64..=4 { + // Try current minute and the prior (POW_GRACE_MINUTES - 1) minutes. + let cache_key = format!("{board_short}:{nonce}"); + for delta in 0..POW_GRACE_MINUTES { let challenge = pow_challenge(board_short, (now_minutes - delta) * 60); - let input = format!("{}:{}", challenge, nonce); + let input = format!("{challenge}:{nonce}"); let hash = Sha256::digest(input.as_bytes()); + if leading_zero_bits(&hash) >= POW_DIFFICULTY { - // Atomically claim this nonce. entry() acquires the shard lock for - // the duration of the call, so no other thread can slip in between + // Atomically claim this nonce. entry() holds the shard lock + // for the duration, so no concurrent request can slip between // the existence check and the insertion. match SEEN_NONCES.entry(cache_key) { Entry::Vacant(e) => { e.insert(now); - return true; // FIX[RACE]: we claimed it atomically + return true; } Entry::Occupied(_) => { return false; // already consumed — replay rejected @@ -188,15 +219,173 @@ pub fn verify_pow(board_short: &str, nonce: &str) -> bool { false } +/// Count the number of leading zero bits in a byte slice. +#[inline] fn leading_zero_bits(bytes: &[u8]) -> u32 { let mut count = 0u32; for &byte in bytes { - if byte == 0 { - count += 8; - } else { - count += byte.leading_zeros(); + let lz = byte.leading_zeros(); + count += lz; + if lz < 8 { break; } } count } + +#[cfg(test)] +mod tests { + use super::*; + + // ── Password hashing ───────────────────────────────────────────── + + #[test] + fn hash_and_verify_password() { + let hash = hash_password("correct-horse-battery-staple").expect("hash_password failed"); + assert!(verify_password("correct-horse-battery-staple", &hash).expect("verify failed")); + assert!(!verify_password("wrong-password", &hash).expect("verify failed")); + } + + #[test] + fn verify_password_rejects_malformed_hash() { + assert!(verify_password("anything", "not-a-phc-string").is_err()); + } + + // ── Random hex ─────────────────────────────────────────────────── + + #[test] + fn random_hex_length() { + assert_eq!(random_hex(16).len(), 32); + assert_eq!(random_hex(32).len(), 64); + } + + #[test] + fn random_hex_is_valid_hex() { + let h = random_hex(32); + assert!(h.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn random_hex_is_not_constant() { + // Vanishingly unlikely to collide for 32 bytes. + assert_ne!(random_hex(32), random_hex(32)); + } + + // ── Token generators ───────────────────────────────────────────── + + #[test] + fn session_id_length() { + assert_eq!(new_session_id().len(), 64); + } + + #[test] + fn deletion_token_length() { + assert_eq!(new_deletion_token().len(), 32); + } + + #[test] + fn csrf_token_length() { + assert_eq!(new_csrf_token().len(), 64); + } + + // ── IP hashing ─────────────────────────────────────────────────── + + #[test] + fn hash_ip_deterministic() { + let a = hash_ip("127.0.0.1", "secret"); + let b = hash_ip("127.0.0.1", "secret"); + assert_eq!(a, b); + } + + #[test] + fn hash_ip_different_salt_differs() { + let a = hash_ip("127.0.0.1", "salt-a"); + let b = hash_ip("127.0.0.1", "salt-b"); + assert_ne!(a, b); + } + + #[test] + fn hash_ip_different_ip_differs() { + let a = hash_ip("10.0.0.1", "salt"); + let b = hash_ip("10.0.0.2", "salt"); + assert_ne!(a, b); + } + + #[test] + fn hash_ip_length() { + assert_eq!(hash_ip("::1", "s").len(), 64); // SHA-256 → 32 bytes → 64 hex + } + + // ── sha256_hex ─────────────────────────────────────────────────── + + #[test] + fn sha256_hex_known_vector() { + // SHA-256("") = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + assert_eq!( + sha256_hex(b""), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ); + } + + // ── leading_zero_bits ──────────────────────────────────────────── + + #[test] + fn leading_zeros_all_zero() { + assert_eq!(leading_zero_bits(&[0, 0, 0, 0]), 32); + } + + #[test] + fn leading_zeros_first_byte_nonzero() { + assert_eq!(leading_zero_bits(&[0x0F, 0xFF]), 4); + assert_eq!(leading_zero_bits(&[0x80, 0x00]), 0); + assert_eq!(leading_zero_bits(&[0x01, 0x00]), 7); + } + + #[test] + fn leading_zeros_second_byte() { + assert_eq!(leading_zero_bits(&[0x00, 0x01]), 15); // 8 + 7 + } + + #[test] + fn leading_zeros_empty() { + assert_eq!(leading_zero_bits(&[]), 0); + } + + // ── PoW challenge format ───────────────────────────────────────── + + #[test] + fn pow_challenge_format() { + let c = pow_challenge("b", 120); // 120 seconds = minute 2 + assert_eq!(c, "b:2"); + } + + // ── Input validation in verify_pow ─────────────────────────────── + + #[test] + fn verify_pow_rejects_empty_board() { + assert!(!verify_pow("", "12345")); + } + + #[test] + fn verify_pow_rejects_empty_nonce() { + assert!(!verify_pow("b", "")); + } + + #[test] + fn verify_pow_rejects_oversized_nonce() { + let long = "a".repeat(MAX_NONCE_LEN + 1); + assert!(!verify_pow("b", &long)); + } + + #[test] + fn verify_pow_rejects_non_ascii_nonce() { + assert!(!verify_pow("b", "nonce\x00evil")); + assert!(!verify_pow("b", "nönce")); + } + + #[test] + fn verify_pow_rejects_non_ascii_board() { + assert!(!verify_pow("böard", "12345")); + assert!(!verify_pow("a:b", "12345")); // colon not allowed + } +} diff --git a/src/utils/tripcode.rs b/src/utils/tripcode.rs index 54b6ffb..4f7c7e6 100644 --- a/src/utils/tripcode.rs +++ b/src/utils/tripcode.rs @@ -3,28 +3,58 @@ // Tripcode system: user enters "Name#password" in the name field. // We split on the first '#', hash the password, display "Name!XXXXXXXXXX". // -// This implementation uses SHA-256 (truncated to 10 chars base64-like encoding) +// This implementation uses SHA-256 (truncated to 10 chars base64url encoding) // for portability. Classic 4chan uses DES-crypt which isn't worth the dependency. // The output is stable: same password always yields same tripcode. +// +// SECURITY NOTE: For production deployments, consider prefixing the password +// with an application-specific HMAC key or domain separator before hashing. +// This would prevent cross-site tripcode correlation and rainbow-table reuse, +// at the cost of changing all existing tripcode outputs. use sha2::{Digest, Sha256}; -/// Parse a name field that may contain a tripcode marker. -/// Returns (display_name, Option). +/// Maximum allowed byte length for the raw name-field input. +/// Prevents excessive memory allocation from adversarial inputs. +const MAX_RAW_INPUT_LEN: usize = 256; + +/// Number of base64url characters retained from the encoded hash. +const TRIPCODE_ENCODED_LEN: usize = 10; + +/// Number of leading SHA-256 bytes to encode. +/// 8 bytes → 11 base64url chars (no padding), which exceeds [`TRIPCODE_ENCODED_LEN`]. +const TRIPCODE_HASH_BYTES: usize = 8; + +/// Default display name when the user supplies an empty or whitespace-only name. +const DEFAULT_NAME: &str = "Anonymous"; + +/// Parse a name field that may contain a tripcode marker (`#`). /// -/// Examples: +/// Returns `(display_name, Option)`. +/// +/// - The input is truncated to [`MAX_RAW_INPUT_LEN`] bytes (at a valid UTF-8 +/// boundary) to bound resource usage. +/// - Splitting occurs on the **first** `#`; subsequent `#` characters become +/// part of the password. +/// +/// # Examples +/// +/// ```text /// "Anonymous" → ("Anonymous", None) -/// "Anon#mypassword" → ("Anon", Some("!Ab3Xy7Kp2Q")) -/// "#triponly" → ("Anonymous", Some("!Ab3Xy7Kp2Q")) +/// "Anon#mypassword" → ("Anon", Some("!Ab3Xy7Kp2Q")) +/// "#triponly" → ("Anonymous", Some("!…")) +/// "Foo#bar#baz" → ("Foo", Some("!…")) // password = "bar#baz" +/// ``` +#[must_use] pub fn parse_name_tripcode(raw: &str) -> (String, Option) { - if let Some(pos) = raw.find('#') { - let name_part = raw[..pos].trim(); - let password = &raw[pos + 1..]; + let raw = truncate_to_char_boundary(raw, MAX_RAW_INPUT_LEN); + if let Some((name_part, password)) = raw.split_once('#') { + let name_part = name_part.trim(); let name = if name_part.is_empty() { - "Anonymous".to_string() + DEFAULT_NAME.to_owned() } else { - name_part.to_string() + name_part.to_owned() }; let trip = if password.is_empty() { @@ -35,61 +65,100 @@ pub fn parse_name_tripcode(raw: &str) -> (String, Option) { (name, trip) } else { - let name = if raw.trim().is_empty() { - "Anonymous".to_string() + let trimmed = raw.trim(); + let name = if trimmed.is_empty() { + DEFAULT_NAME.to_owned() } else { - raw.trim().to_string() + trimmed.to_owned() }; (name, None) } } +/// Truncate `s` to at most `max_bytes` bytes, rounding down to the nearest +/// UTF-8 character boundary so the result is always valid `&str`. +fn truncate_to_char_boundary(s: &str, max_bytes: usize) -> &str { + if s.len() <= max_bytes { + return s; + } + let mut end = max_bytes; + // Walk backwards until we land on a char boundary. + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + // SAFETY: `is_char_boundary(end)` guarantees a valid split point. + &s[..end] +} + /// Compute a tripcode from a password string. -/// Returns a string like "!Ab3Xy7Kp2Q" (11 chars: '!' + 10 alphanum). +/// +/// Returns a string like `"!Ab3Xy7Kp2Q"` — a `'!'` prefix followed by +/// [`TRIPCODE_ENCODED_LEN`] base64url characters. fn compute_tripcode(password: &str) -> String { - let mut hasher = Sha256::new(); - hasher.update(password.as_bytes()); - let result = hasher.finalize(); - // Encode first 8 bytes as base64url, take first 10 chars. - // result is a 32-byte GenericArray — get(..8) is always Some. - let eight = result.get(..8).expect("SHA-256 output is 32 bytes"); - let encoded = base64url_encode(eight); - // encoded of 8 bytes = ceil(8/3)*4 = 12 chars; take first 10. - let ten = encoded.get(..10).expect("base64url of 8 bytes is 12 chars"); - format!("!{ten}") + let hash = Sha256::digest(password.as_bytes()); + + // SHA-256 always produces 32 bytes; extract the leading bytes into a + // fixed-size array so no fallible bounds check is needed at runtime. + let leading: &[u8; TRIPCODE_HASH_BYTES] = hash + .first_chunk::() + .expect("SHA-256 output is 32 bytes; first 8 always exist"); + + let encoded = base64url_encode(leading); + + debug_assert!( + encoded.len() >= TRIPCODE_ENCODED_LEN, + "base64url of {TRIPCODE_HASH_BYTES} bytes must yield >= {TRIPCODE_ENCODED_LEN} chars, got {}", + encoded.len(), + ); + + let mut tripcode = String::with_capacity(1 + TRIPCODE_ENCODED_LEN); + tripcode.push('!'); + // `encoded` is 11 chars for 8 input bytes; taking 10 is always valid. + tripcode.push_str(&encoded[..TRIPCODE_ENCODED_LEN]); + tripcode } -/// Minimal base64url encoding (URL-safe alphabet, no padding). +/// Minimal base64url encoder (RFC 4648 §5 alphabet, **no** padding). +/// +/// # Safety of indexing /// -/// All slice indexing here is on `input` (controlled by the loop bounds) -/// and on ALPHABET (a 64-byte constant indexed by a value masked to 6 bits, -/// so always 0–63). Both are provably in-bounds; allow the lint. +/// - `input[i]`, `input[i+1]`, `input[i+2]` are guarded by `i + 2 < input.len()`. +/// - `ALPHABET[x]` where `x` is produced by 6-bit masking (0‥63) into a +/// 64-element array — always in bounds. #[allow(clippy::indexing_slicing)] fn base64url_encode(input: &[u8]) -> String { - const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - let mut output = String::new(); + const ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + // Upper-bound allocation: ⌈len/3⌉ × 4 (exact for padded; at most 2 chars + // over for unpadded, which is fine). + let capacity = input.len().div_ceil(3) * 4; + let mut output = String::with_capacity(capacity); + let mut i = 0; while i + 2 < input.len() { let b0 = input[i] as usize; let b1 = input[i + 1] as usize; let b2 = input[i + 2] as usize; output.push(ALPHABET[b0 >> 2] as char); - output.push(ALPHABET[((b0 & 3) << 4) | (b1 >> 4)] as char); - output.push(ALPHABET[((b1 & 0xf) << 2) | (b2 >> 6)] as char); + output.push(ALPHABET[((b0 & 0x03) << 4) | (b1 >> 4)] as char); + output.push(ALPHABET[((b1 & 0x0f) << 2) | (b2 >> 6)] as char); output.push(ALPHABET[b2 & 0x3f] as char); i += 3; } - if i < input.len() { + + let remaining = input.len() - i; + if remaining == 2 { let b0 = input[i] as usize; + let b1 = input[i + 1] as usize; output.push(ALPHABET[b0 >> 2] as char); - if i + 1 < input.len() { - let b1 = input[i + 1] as usize; - output.push(ALPHABET[((b0 & 3) << 4) | (b1 >> 4)] as char); - output.push(ALPHABET[(b1 & 0xf) << 2] as char); - } else { - output.push(ALPHABET[(b0 & 3) << 4] as char); - } + output.push(ALPHABET[((b0 & 0x03) << 4) | (b1 >> 4)] as char); + output.push(ALPHABET[(b1 & 0x0f) << 2] as char); + } else if remaining == 1 { + let b0 = input[i] as usize; + output.push(ALPHABET[b0 >> 2] as char); + output.push(ALPHABET[(b0 & 0x03) << 4] as char); } + output } @@ -98,26 +167,127 @@ mod tests { use super::*; #[test] - fn test_tripcode_stable() { + fn tripcode_is_stable_across_names() { let (_, t1) = parse_name_tripcode("Anon#password123"); let (_, t2) = parse_name_tripcode("DifferentName#password123"); - // Same password → same tripcode regardless of name - assert_eq!(t1, t2); - // Panic on failure is the correct behaviour inside a test assertion. - assert!(t1.expect("tripcode should be present").starts_with('!')); + assert_eq!(t1, t2, "same password must produce identical tripcodes"); + let trip = t1.expect("tripcode should be present"); + assert!(trip.starts_with('!')); + assert_eq!(trip.len(), 1 + TRIPCODE_ENCODED_LEN); } #[test] - fn test_no_tripcode() { + fn no_tripcode_marker() { let (name, trip) = parse_name_tripcode("Anonymous"); assert_eq!(name, "Anonymous"); assert!(trip.is_none()); } #[test] - fn test_empty_name() { + fn empty_name_defaults_to_anonymous() { let (name, trip) = parse_name_tripcode("#somepassword"); - assert_eq!(name, "Anonymous"); + assert_eq!(name, DEFAULT_NAME); assert!(trip.is_some()); } + + #[test] + fn empty_input_defaults_to_anonymous() { + let (name, trip) = parse_name_tripcode(""); + assert_eq!(name, DEFAULT_NAME); + assert!(trip.is_none()); + } + + #[test] + fn whitespace_only_defaults_to_anonymous() { + let (name, trip) = parse_name_tripcode(" "); + assert_eq!(name, DEFAULT_NAME); + assert!(trip.is_none()); + } + + #[test] + fn empty_password_yields_no_tripcode() { + let (name, trip) = parse_name_tripcode("Anon#"); + assert_eq!(name, "Anon"); + assert!(trip.is_none()); + } + + #[test] + fn multiple_hashes_splits_on_first_only() { + let (name, trip) = parse_name_tripcode("Foo#bar#baz"); + assert_eq!(name, "Foo"); + assert!(trip.is_some(), "password 'bar#baz' should yield a tripcode"); + + // Verify the password includes everything after the first '#'. + let (_, trip_plain) = parse_name_tripcode("X#bar#baz"); + assert_eq!(trip, trip_plain); + } + + #[test] + fn tripcode_character_set() { + let (_, trip) = parse_name_tripcode("User#secret"); + let trip = trip.expect("tripcode should be present"); + assert!(trip.starts_with('!')); + assert_eq!(trip.len(), 11); // '!' + 10 chars + assert!( + trip[1..] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'), + "tripcode body must use base64url alphabet, got: {trip}" + ); + } + + #[test] + fn different_passwords_yield_different_tripcodes() { + let (_, t1) = parse_name_tripcode("A#password1"); + let (_, t2) = parse_name_tripcode("A#password2"); + assert_ne!(t1, t2); + } + + #[test] + fn long_input_is_truncated_safely() { + let long_name = "A".repeat(1000); + let (name, trip) = parse_name_tripcode(&long_name); + assert!(name.len() <= MAX_RAW_INPUT_LEN); + assert!(trip.is_none()); + } + + #[test] + fn multibyte_truncation_preserves_utf8() { + // 'é' is 2 bytes in UTF-8. Build a string exceeding the limit. + let repeated = "é".repeat(MAX_RAW_INPUT_LEN); // 512 bytes + let (name, _) = parse_name_tripcode(&repeated); + assert!(name.len() <= MAX_RAW_INPUT_LEN); + // The returned name is a valid String, so UTF-8 validity is guaranteed + // by the type system. Verify it round-trips. + assert_eq!(name, name.as_str()); + } + + #[test] + fn base64url_known_vector() { + // "Hello" (5 bytes) → base64url "SGVsbG8" (no padding). + assert_eq!(base64url_encode(b"Hello"), "SGVsbG8"); + } + + #[test] + fn base64url_empty_input() { + assert_eq!(base64url_encode(b""), ""); + } + + #[test] + fn base64url_one_byte() { + // 0x00 → "AA" (no padding) + assert_eq!(base64url_encode(&[0x00]), "AA"); + } + + #[test] + fn base64url_two_bytes() { + // 0x00 0x00 → "AAA" (no padding) + assert_eq!(base64url_encode(&[0x00, 0x00]), "AAA"); + } + + #[test] + fn base64url_three_bytes() { + // 0x00 0x00 0x00 → "AAAA" + assert_eq!(base64url_encode(&[0x00, 0x00, 0x00]), "AAAA"); + } } From 20252c673fad093915dc14660cc297c7b223e221 Mon Sep 17 00:00:00 2001 From: csd113 Date: Mon, 9 Mar 2026 19:54:00 -0700 Subject: [PATCH 5/7] Update main.rs --- src/main.rs | 175 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 46 deletions(-) diff --git a/src/main.rs b/src/main.rs index 20a9fad..2618a15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,7 +27,7 @@ use clap::{Parser, Subcommand}; use dashmap::DashMap; use once_cell::sync::Lazy; use std::net::SocketAddr; -use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{Duration, Instant}; use tracing::info; use tracing_subscriber::{filter::EnvFilter, fmt}; @@ -55,9 +55,17 @@ static THEME_INIT_JS: &str = include_str!("../static/theme-init.js"); /// Total HTTP requests handled since startup. pub static REQUEST_COUNT: AtomicU64 = AtomicU64::new(0); /// Requests currently being processed (in-flight). -static IN_FLIGHT: AtomicI64 = AtomicI64::new(0); +/// +/// FIX[AUDIT-1]: Changed from `AtomicI64` to `AtomicU64`. In-flight request +/// counts are inherently non-negative; using a signed type required defensive +/// `.max(0)` casts at every read site and masked counter underflow bugs. +/// Decrements use `ScopedDecrement` RAII guards (see below) to prevent +/// counter leaks when async futures are cancelled mid-flight. +static IN_FLIGHT: AtomicU64 = AtomicU64::new(0); /// Multipart file uploads currently in progress. -static ACTIVE_UPLOADS: AtomicI64 = AtomicI64::new(0); +/// +/// FIX[AUDIT-1]: Same signed→unsigned change as `IN_FLIGHT`. +static ACTIVE_UPLOADS: AtomicU64 = AtomicU64::new(0); /// Monotonic tick used to animate the upload spinner. static SPINNER_TICK: AtomicU64 = AtomicU64::new(0); /// Recently active client IPs (last ~5 min); maps SHA-256(IP) → last-seen Instant. @@ -65,6 +73,29 @@ static SPINNER_TICK: AtomicU64 = AtomicU64::new(0); /// memory (or coredumps). The count is used for the "users online" display. static ACTIVE_IPS: Lazy> = Lazy::new(DashMap::new); +// ─── RAII counter guard ─────────────────────────────────────────────────────── +// +// FIX[AUDIT-2]: `IN_FLIGHT` and `ACTIVE_UPLOADS` are decremented inside +// `track_requests` *after* `.await`. If the surrounding future is cancelled +// (e.g. client disconnect, timeout, or panic in a handler), the post-await +// code never runs and the counters permanently over-count. +// +// `ScopedDecrement` ties the decrement to the guard's lifetime so it fires +// unconditionally via `Drop`, even when the future is dropped mid-flight. +// The decrement is saturating to prevent underflow on `AtomicU64`. +struct ScopedDecrement<'a>(&'a AtomicU64); + +impl Drop for ScopedDecrement<'_> { + fn drop(&mut self) { + // Saturating decrement: fetch_update retries on spurious failure. + let _ = self + .0 + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |v| { + Some(v.saturating_sub(1)) + }); + } +} + // ─── CLI definition ─────────────────────────────────────────────────────────── #[derive(Parser)] @@ -807,6 +838,10 @@ async fn track_requests( REQUEST_COUNT.fetch_add(1, Ordering::Relaxed); IN_FLIGHT.fetch_add(1, Ordering::Relaxed); + // FIX[AUDIT-2]: Bind the in-flight decrement to a RAII guard so it fires + // even if this future is cancelled (e.g. client disconnect, handler panic). + let _in_flight_guard = ScopedDecrement(&IN_FLIGHT); + // Attach a per-request UUID to every tracing span so correlated log lines // can be grouped by request even under concurrent load (#12). let req_id = uuid::Uuid::new_v4(); @@ -846,19 +881,15 @@ async fn track_requests( .map(|ct| ct.contains("multipart/form-data")) .unwrap_or(false); - if is_upload { + // FIX[AUDIT-2]: Bind upload decrement to a RAII guard for the same reason. + // Option is None when is_upload is false — zero-cost branch. + let _upload_guard = is_upload.then(|| { ACTIVE_UPLOADS.fetch_add(1, Ordering::Relaxed); - } + ScopedDecrement(&ACTIVE_UPLOADS) + }); use tracing::Instrument as _; - let resp = next.run(req).instrument(span).await; - - IN_FLIGHT.fetch_sub(1, Ordering::Relaxed); - if is_upload { - ACTIVE_UPLOADS.fetch_sub(1, Ordering::Relaxed); - } - - resp + next.run(req).instrument(span).await } // ─── First-run check ───────────────────────────────────────────────────────── @@ -960,15 +991,22 @@ fn print_stats(pool: &db::DbPool, start: Instant, ts: &mut TermStats) { ts.prev_post_count = posts; // Active connections / users online - let in_flight = IN_FLIGHT.load(Ordering::Relaxed).max(0) as u64; + // FIX[AUDIT-1]: IN_FLIGHT is now AtomicU64 — load directly, no .max(0) cast. + let in_flight = IN_FLIGHT.load(Ordering::Relaxed); let online_count = ACTIVE_IPS.len(); + // CRIT-5: Keys are SHA-256 hashes — show 8-char prefixes for diagnostics. + // FIX[AUDIT-3]: Use .get(..8) instead of direct [..8] byte-index so this + // stays safe if a key is ever shorter than 8 chars (defensive programming). let ip_list: String = { let mut hashes: Vec = ACTIVE_IPS .iter() - .map(|e| e.key()[..8].to_string()) + .map(|e| { + let key = e.key(); + key.get(..8).unwrap_or(key.as_str()).to_string() + }) .collect(); - hashes.sort(); + hashes.sort_unstable(); hashes.truncate(5); if hashes.is_empty() { "none".into() @@ -978,7 +1016,8 @@ fn print_stats(pool: &db::DbPool, start: Instant, ts: &mut TermStats) { }; // Upload progress bar — shown only while uploads are active - let active_uploads = ACTIVE_UPLOADS.load(Ordering::Relaxed).max(0) as u64; + // FIX[AUDIT-1]: ACTIVE_UPLOADS is now AtomicU64 — load directly. + let active_uploads = ACTIVE_UPLOADS.load(Ordering::Relaxed); if active_uploads > 0 { // Fix #5: SPINNER_TICK was read but never written anywhere, so the // spinner was permanently frozen on frame 0 ("⠋"). Increment it here, @@ -1051,12 +1090,11 @@ fn process_rss_kb() -> u64 { if let Ok(s) = std::fs::read_to_string("/proc/self/status") { for line in s.lines() { if let Some(val) = line.strip_prefix("VmRSS:") { - let kb: u64 = val + return val .split_whitespace() .next() .and_then(|n| n.parse().ok()) .unwrap_or(0); - return kb; } } } @@ -1133,16 +1171,17 @@ fn print_banner() { // │ character is always aligned regardless of the actual value length. const INNER: usize = 53; - // Truncate `s` to `width` chars, then right-pad with spaces to `width`. + // FIX[AUDIT-4]: The original closure collected chars into a `Vec` and + // had a dead `unwrap_or_else(|| s.clone())` branch — `get(..width)` on a + // slice of length >= width never returns None. The rewrite uses + // `chars().count()` (no heap allocation) and `chars().take(width).collect()` + // (single pass), eliminating both the intermediate Vec and the dead code. let cell = |s: String, width: usize| -> String { - let chars: Vec = s.chars().collect(); - if chars.len() >= width { - chars - .get(..width) - .map(|s| s.iter().collect()) - .unwrap_or_else(|| s.clone()) + let char_count = s.chars().count(); + if char_count >= width { + s.chars().take(width).collect() } else { - format!("{}{}", s, " ".repeat(width - chars.len())) + format!("{}{}", s, " ".repeat(width - char_count)) } }; @@ -1276,6 +1315,20 @@ fn kb_create_board(pool: &db::DbPool, reader: &mut dyn std::io::BufRead) { println!(" Aborted."); return; } + + // FIX[AUDIT-5]: Validate short name immediately after reading it, before + // prompting for the remaining fields. Previously validation happened at + // the bottom of the function, so the user would fill in all prompts before + // learning the short name was invalid. + let short_lc = short.to_lowercase(); + if short_lc.is_empty() + || short_lc.len() > 8 + || !short_lc.chars().all(|c| c.is_ascii_alphanumeric()) + { + println!(" \x1b[31m[err]\x1b[0m Short name must be 1-8 alphanumeric characters."); + return; + } + let name = prompt("Display name:"); if name.is_empty() { println!(" Aborted."); @@ -1295,15 +1348,6 @@ fn kb_create_board(pool: &db::DbPool, reader: &mut dyn std::io::BufRead) { let allow_video = !matches!(no_videos_raw.to_lowercase().as_str(), "y" | "yes"); let allow_audio = !matches!(no_audio_raw.to_lowercase().as_str(), "y" | "yes"); - let short_lc = short.to_lowercase(); - if !short_lc.chars().all(|c| c.is_ascii_alphanumeric()) - || short_lc.is_empty() - || short_lc.len() > 8 - { - println!(" \x1b[31m[err]\x1b[0m Short name must be 1-8 alphanumeric characters."); - return; - } - let Ok(conn) = pool.get() else { println!(" \x1b[31m[err]\x1b[0m Could not get DB connection."); return; @@ -1431,9 +1475,18 @@ fn run_admin(action: AdminAction) -> anyhow::Result<()> { use chrono::TimeZone; let db_path = std::path::Path::new(&CONFIG.database_path); - if let Some(parent) = db_path.parent() { - std::fs::create_dir_all(parent)?; - } + + // FIX[AUDIT-6]: Apply the same Fix #9 empty-parent guard used in + // `run_server`. The original code used a plain `if let Some(parent)` + // check, which does NOT handle the case where `Path::parent()` returns + // `Some("")` for a bare filename (e.g. "rustchan.db"). + // `create_dir_all("")` fails with `NotFound`, so we normalise an empty + // parent to `"."` just as `run_server` does. + let db_parent: std::path::PathBuf = match db_path.parent() { + Some(p) if !p.as_os_str().is_empty() => p.to_path_buf(), + _ => std::path::PathBuf::from("."), + }; + std::fs::create_dir_all(&db_parent)?; let pool = db::init_pool()?; let conn = pool.get()?; @@ -1483,9 +1536,9 @@ fn run_admin(action: AdminAction) -> anyhow::Result<()> { no_audio, } => { let short = short.to_lowercase(); - if !short.chars().all(|c| c.is_ascii_alphanumeric()) - || short.is_empty() + if short.is_empty() || short.len() > 8 + || !short.chars().all(|c| c.is_ascii_alphanumeric()) { anyhow::bail!("Short name must be 1-8 alphanumeric chars (e.g. 'tech', 'b')."); } @@ -1574,7 +1627,9 @@ fn run_admin(action: AdminAction) -> anyhow::Result<()> { ); println!("{}", "-".repeat(75)); for b in &bans { - let partial = &b.ip_hash[..b.ip_hash.len().min(16)]; + // FIX[AUDIT-3]: Use .get(..16) for the same defensive + // safety as the ip_list slice above. + let partial = b.ip_hash.get(..16).unwrap_or(b.ip_hash.as_str()); let expires = b .expires_at .and_then(|ts| chrono::Utc.timestamp_opt(ts, 0).single()) @@ -1633,7 +1688,11 @@ fn evict_thumb_cache(upload_dir: &str, max_bytes: u64) { } } - let total: u64 = files.iter().map(|(_, _, sz)| sz).sum(); + // FIX[AUDIT-7]: Dereference `sz` explicitly and annotate the sum type for + // clarity. `Iterator` implements `Sum<&u64>` in std so this + // compiled before, but the explicit form is more readable and avoids the + // implicit coercion. + let total: u64 = files.iter().map(|(_, _, sz)| *sz).sum::(); if total <= max_bytes { return; // already within budget } @@ -1652,7 +1711,8 @@ fn evict_thumb_cache(upload_dir: &str, max_bytes: u64) { Ok(()) => { remaining = remaining.saturating_sub(*size); deleted += 1; - deleted_bytes += size; + // FIX[AUDIT-7]: Dereference `size` for clarity. + deleted_bytes += *size; } Err(e) => { tracing::warn!("evict_thumb_cache: failed to delete {:?}: {}", path, e); @@ -1670,9 +1730,32 @@ fn evict_thumb_cache(upload_dir: &str, max_bytes: u64) { } } +// ─── Password validation ────────────────────────────────────────────────────── + +/// Validate an admin password. +/// +/// FIX[AUDIT-8]: The original check only enforced a length of 8, which is too +/// weak for an admin credential on a security-critical service. Requirements +/// are now: +/// • Minimum 12 characters (NIST SP 800-63B §5.1.1 guidance) +/// • At least one uppercase ASCII letter +/// • At least one lowercase ASCII letter +/// • At least one ASCII digit +/// +/// These checks are intentionally simple (no special-char requirement) to +/// avoid overly prescriptive rules that lead to weaker real-world passwords. fn validate_password(p: &str) -> anyhow::Result<()> { - if p.len() < 8 { - anyhow::bail!("Password must be at least 8 characters."); + if p.len() < 12 { + anyhow::bail!("Password must be at least 12 characters."); + } + let has_lower = p.chars().any(|c| c.is_ascii_lowercase()); + let has_upper = p.chars().any(|c| c.is_ascii_uppercase()); + let has_digit = p.chars().any(|c| c.is_ascii_digit()); + if !has_lower || !has_upper || !has_digit { + anyhow::bail!( + "Password must contain at least one uppercase letter, \ + one lowercase letter, and one digit." + ); } Ok(()) } From bdc50118892f28791c948e732d1e458d881a7998 Mon Sep 17 00:00:00 2001 From: csd113 Date: Tue, 10 Mar 2026 12:53:13 -0700 Subject: [PATCH 6/7] Cargo clippy fixes, now there are not issues --- Cargo.toml | 6 + clippy.sh | 55 ++ src/config.rs | 66 +-- src/db/admin.rs | 181 ++++-- src/db/boards.rs | 99 ++-- src/db/mod.rs | 65 +-- src/db/posts.rs | 216 +++++--- src/db/threads.rs | 105 ++-- src/detect.rs | 51 +- src/error.rs | 24 +- src/handlers/admin.rs | 1148 ++++++++++++++++++++++----------------- src/handlers/board.rs | 151 +++-- src/handlers/mod.rs | 61 +-- src/handlers/thread.rs | 65 +-- src/main.rs | 327 ++++++----- src/middleware/mod.rs | 58 +- src/models.rs | 60 +- src/templates/admin.rs | 151 ++--- src/templates/board.rs | 270 ++++----- src/templates/forms.rs | 9 +- src/templates/mod.rs | 104 ++-- src/templates/thread.rs | 188 ++++--- src/utils/crypto.rs | 21 +- src/utils/files.rs | 421 ++++++++------ src/utils/sanitize.rs | 181 +++--- src/workers/mod.rs | 199 +++---- 26 files changed, 2469 insertions(+), 1813 deletions(-) create mode 100755 clippy.sh diff --git a/Cargo.toml b/Cargo.toml index fae4642..9d74064 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,3 +90,9 @@ unwrap_used = "warn" # force .expect("reason") or proper ha expect_used = "allow" # allow in tests; use with a message in production indexing_slicing = "warn" # prefer .get() with bounds check arithmetic_side_effects = "allow" # too noisy for general arithmetic +missing_errors_doc = "allow" # pedantic docs lint, too noisy for all Results +doc_markdown = "allow" # allow non-backticked identifiers in docs +doc_lazy_continuation = "allow" # don't require strict indentation for doc lists +items_after_statements = "allow" # permit helper items inside functions +wildcard_imports = "allow" # crate-level style choice for imports +uninlined_format_args = "allow" # allow older-style format! calls diff --git a/clippy.sh b/clippy.sh new file mode 100755 index 0000000..cc492c3 --- /dev/null +++ b/clippy.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +set -o pipefail + +OUTPUT_DIR="clippy_reports" +RAW_FILE="$OUTPUT_DIR/clippy_raw.txt" +CLUSTER_DIR="$OUTPUT_DIR/clusters" + +mkdir -p "$OUTPUT_DIR" +mkdir -p "$CLUSTER_DIR" + +echo "Running cargo clippy..." + +cargo clippy --all-targets --all-features -- \ +-D warnings \ +-W clippy::pedantic \ +-W clippy::nursery \ +2>&1 | tee "$RAW_FILE" + +echo "Parsing Clippy output..." + +while IFS= read -r line +do + # Extract file path patterns like src/db/file.rs:12:5 + if [[ $line =~ ([a-zA-Z0-9_\/.-]+\.rs):[0-9]+:[0-9]+ ]]; then + + FILE="${BASH_REMATCH[1]}" + + # Determine folder cluster + DIR=$(dirname "$FILE") + + # Normalize root files into their own cluster + if [[ "$DIR" == "." ]]; then + CLUSTER="root" + else + CLUSTER=$(echo "$DIR" | tr '/' '_') + fi + + OUTFILE="$CLUSTER_DIR/${CLUSTER}.txt" + + echo "" >> "$OUTFILE" + echo "----------------------------------------" >> "$OUTFILE" + echo "FILE: $FILE" >> "$OUTFILE" + echo "----------------------------------------" >> "$OUTFILE" + fi + + # Append the line to the current cluster if defined + if [[ -n "$OUTFILE" ]]; then + echo "$line" >> "$OUTFILE" + fi + +done < "$RAW_FILE" + +echo "Cluster reports generated in:" +echo "$CLUSTER_DIR" \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index f43b667..9e5b716 100644 --- a/src/config.rs +++ b/src/config.rs @@ -11,28 +11,25 @@ // SECURITY: The cookie_secret is auto-generated on first run and persisted // to settings.toml. It is never left at a well-known default value. -use once_cell::sync::Lazy; use rand_core::{OsRng, RngCore}; use serde::Deserialize; use std::env; use std::path::PathBuf; +use std::sync::LazyLock; /// Absolute path to the directory the running binary lives in. fn binary_dir() -> PathBuf { - match std::env::current_exe() + std::env::current_exe() .ok() - .and_then(|p| p.parent().map(|p| p.to_path_buf())) - { - Some(dir) => dir, - None => { + .and_then(|p| p.parent().map(std::path::Path::to_path_buf)) + .unwrap_or_else(|| { eprintln!( "Warning: could not determine binary directory; \ using current working directory for data storage. \ Set CHAN_DB and CHAN_UPLOADS env vars to override." ); PathBuf::from(".") - } - } + }) } fn settings_file_path() -> PathBuf { @@ -60,7 +57,7 @@ struct SettingsFile { cookie_secret: Option, enable_tor_support: Option, require_ffmpeg: Option, - /// How often to run PRAGMA wal_checkpoint(TRUNCATE), in seconds. + /// How often to run PRAGMA `wal_checkpoint(TRUNCATE)`, in seconds. /// Set to 0 to disable. Default: 3600 (hourly). wal_checkpoint_interval_secs: Option, /// How often to run VACUUM to reclaim disk space, in hours. @@ -76,17 +73,17 @@ struct SettingsFile { /// When this limit is reached, new jobs are dropped (with a warning) rather /// than accepted. Default: 1000. job_queue_capacity: Option, - /// Maximum seconds to allow a single FFmpeg transcode or waveform job to + /// Maximum seconds to allow a single `FFmpeg` transcode or waveform job to /// run before it is killed. Default: 120. ffmpeg_timeout_secs: Option, /// When true, overflow threads are always archived rather than hard-deleted, - /// even on boards with allow_archive = false. Default: true. + /// even on boards with `allow_archive` = false. Default: true. archive_before_prune: Option, /// Maximum total size (MiB) of all thumbnail/waveform cache files across all /// boards. A background task evicts the oldest files when exceeded. /// Set to 0 to disable. Default: 200. waveform_cache_max_mb: Option, - /// Number of threads in Tokio's blocking pool (spawn_blocking). + /// Number of threads in Tokio's blocking pool (`spawn_blocking`). /// Defaults to logical CPUs × 4. Increase if DB/render latency is a bottleneck /// under load. blocking_threads: Option, @@ -106,7 +103,7 @@ fn load_settings_file() -> SettingsFile { /// Create settings.toml with defaults if it does not exist yet. /// Call this once at startup (before CONFIG is accessed for the first time). /// -/// A cryptographically random cookie_secret is generated on first run and +/// A cryptographically random `cookie_secret` is generated on first run and /// written to settings.toml. Subsequent runs load it from the file. /// The server never operates with a known/default secret. pub fn generate_settings_file_if_missing() { @@ -214,15 +211,16 @@ cookie_secret = "{secret}" ); match std::fs::write(&path, content) { - Ok(_) => println!("Created settings.toml ({})", path.display()), + Ok(()) => println!("Created settings.toml ({})", path.display()), Err(e) => eprintln!("Warning: could not write settings.toml: {e}"), } } // ─── Runtime config ─────────────────────────────────────────────────────────── -pub static CONFIG: Lazy = Lazy::new(Config::from_env); +pub static CONFIG: LazyLock = LazyLock::new(Config::from_env); +#[allow(clippy::struct_excessive_bools)] pub struct Config { // ── Loaded from settings.toml (env vars still override) ────────────────── pub forum_name: String, @@ -252,7 +250,7 @@ pub struct Config { pub default_bump_limit: u32, #[allow(dead_code)] // used as default when creating boards via CLI/admin pub max_threads_per_board: u32, - /// Maximum GET requests per IP per rate_limit_window. + /// Maximum GET requests per IP per `rate_limit_window`. pub rate_limit_gets: u32, pub rate_limit_window: u64, pub cookie_secret: String, @@ -270,7 +268,7 @@ pub struct Config { pub db_warn_threshold_bytes: u64, /// Maximum number of pending jobs before new ones are dropped. pub job_queue_capacity: u64, - /// Maximum seconds a single FFmpeg job may run before being killed. + /// Maximum seconds a single `FFmpeg` job may run before being killed. pub ffmpeg_timeout_secs: u64, /// When true, threads are always archived (never hard-deleted) on prune, /// overriding individual board settings. @@ -282,6 +280,8 @@ pub struct Config { } impl Config { + #[must_use] + #[allow(clippy::too_many_lines)] pub fn from_env() -> Self { let s = load_settings_file(); let data_dir = binary_dir().join("rustchan-data"); @@ -399,7 +399,7 @@ impl Config { }, blocking_threads: { let cpus = std::thread::available_parallelism() - .map(|p| p.get()) + .map(std::num::NonZero::get) .unwrap_or(4); let configured = env_parse("CHAN_BLOCKING_THREADS", s.blocking_threads.unwrap_or(0)); @@ -415,7 +415,16 @@ impl Config { /// Validate critical configuration values and abort with a clear error /// message if any are out of range. Called once at startup so operators /// catch misconfiguration immediately rather than discovering it at runtime. + /// + /// # Errors + /// Returns an error if any configuration value is out of an acceptable range, + /// or if the upload directory is not writable. pub fn validate(&self) -> anyhow::Result<()> { + const MIB: usize = 1024 * 1024; + const MAX_IMAGE_MIB: usize = 100; + const MAX_VIDEO_MIB: usize = 2048; + const MAX_AUDIO_MIB: usize = 512; + // cookie_secret is hex-encoded: 64 hex chars = 32 bytes of entropy. if self.cookie_secret.len() < 64 { anyhow::bail!( @@ -426,11 +435,6 @@ impl Config { ); } - const MIB: usize = 1024 * 1024; - const MAX_IMAGE_MIB: usize = 100; - const MAX_VIDEO_MIB: usize = 2048; - const MAX_AUDIO_MIB: usize = 512; - if self.max_image_size < MIB || self.max_image_size > MAX_IMAGE_MIB * MIB { anyhow::bail!( "CONFIG ERROR: max_image_size_mb must be between 1 and {} MiB (got {} MiB).", @@ -486,6 +490,12 @@ impl Config { /// exists). On a fresh install `generate_settings_file_if_missing` always /// writes both keys, so this is only a concern for manually-crafted files. pub fn update_settings_file_site_names(forum_name: &str, site_subtitle: &str) { + // Escape backslash and double-quote, then wrap in double quotes. + fn toml_quote(s: &str) -> String { + let inner = s.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{inner}\"") + } + let path = settings_file_path(); let content = match std::fs::read_to_string(&path) { Ok(c) => c, @@ -498,11 +508,6 @@ pub fn update_settings_file_site_names(forum_name: &str, site_subtitle: &str) { // Replace the value portion of `key = "..."` lines while preserving // indentation, comments on the same line, and surrounding whitespace. // We use a simple line-by-line scan so that file comments are untouched. - fn toml_quote(s: &str) -> String { - // Escape backslash and double-quote, then wrap in double quotes. - let inner = s.replace('\\', "\\\\").replace('"', "\\\""); - format!("\"{inner}\"") - } let trailing_newline = content.ends_with('\n'); let updated: Vec = content @@ -531,7 +536,7 @@ pub fn update_settings_file_site_names(forum_name: &str, site_subtitle: &str) { // ─── Cookie secret rotation check ──────────────────────────────────────────── -/// Check whether the cookie_secret has changed since the last run by comparing +/// Check whether the `cookie_secret` has changed since the last run by comparing /// a SHA-256 hash stored in the DB against the currently loaded secret. /// /// Called once at startup after the DB pool is ready. @@ -539,6 +544,7 @@ pub fn update_settings_file_site_names(forum_name: &str, site_subtitle: &str) { /// On first run (no stored hash), silently stores the current hash and returns. pub fn check_cookie_secret_rotation(conn: &rusqlite::Connection) { use sha2::{Digest, Sha256}; + const KEY: &str = "cookie_secret_hash"; let current_hash = { let mut h = Sha256::new(); @@ -546,8 +552,6 @@ pub fn check_cookie_secret_rotation(conn: &rusqlite::Connection) { hex::encode(h.finalize()) }; - const KEY: &str = "cookie_secret_hash"; - let stored = conn .query_row( "SELECT value FROM site_settings WHERE key = ?1", diff --git a/src/db/admin.rs b/src/db/admin.rs index 4b7d086..503964c 100644 --- a/src/db/admin.rs +++ b/src/db/admin.rs @@ -22,12 +22,14 @@ // LOW-11 get_db_size_bytes: added doc comment noting WAL file not included // LOW-12 is_banned NULLS FIRST: added doc comment for SQLite ≥ 3.30.0 requirement -use crate::models::*; +use crate::models::{AdminSession, AdminUser, Ban, WordFilter}; use anyhow::{Context, Result}; use rusqlite::{params, OptionalExtension}; // ─── Admin user queries ─────────────────────────────────────────────────────── +/// # Errors +/// Returns an error if the database operation fails. pub fn get_admin_by_username( conn: &rusqlite::Connection, username: &str, @@ -47,8 +49,11 @@ pub fn get_admin_by_username( .optional()?) } -/// FIX[MED-4]: Replaced execute + last_insert_rowid() with INSERT … RETURNING id +/// FIX[MED-4]: Replaced execute + `last_insert_rowid()` with INSERT … RETURNING id /// to retrieve the new row id atomically in the same statement. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn create_admin(conn: &rusqlite::Connection, username: &str, hash: &str) -> Result { let id: i64 = conn .query_row( @@ -62,6 +67,9 @@ pub fn create_admin(conn: &rusqlite::Connection, username: &str, hash: &str) -> /// FIX[MED-5]: Added rows-affected check — silently succeeding when the target /// username doesn't exist made password-reset errors invisible to the operator. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn update_admin_password( conn: &rusqlite::Connection, username: &str, @@ -74,13 +82,16 @@ pub fn update_admin_password( ) .context("Failed to update admin password")?; if n == 0 { - anyhow::bail!("Admin user '{}' not found", username); + anyhow::bail!("Admin user '{username}' not found"); } Ok(()) } /// List all admin users (for CLI tooling). -/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. +/// FIX[LOW-9]: Switched from bare prepare to `prepare_cached`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn list_admins(conn: &rusqlite::Connection) -> Result> { let mut stmt = conn.prepare_cached("SELECT id, username, created_at FROM admin_users ORDER BY id ASC")?; @@ -90,7 +101,10 @@ pub fn list_admins(conn: &rusqlite::Connection) -> Result Result> { Ok(conn .query_row( @@ -103,6 +117,8 @@ pub fn get_admin_name_by_id(conn: &rusqlite::Connection, admin_id: i64) -> Resul // ─── Session queries ────────────────────────────────────────────────────────── +/// # Errors +/// Returns an error if the database operation fails. pub fn create_session( conn: &rusqlite::Connection, session_id: &str, @@ -117,6 +133,8 @@ pub fn create_session( Ok(()) } +/// # Errors +/// Returns an error if the database operation fails. pub fn get_session(conn: &rusqlite::Connection, session_id: &str) -> Result> { let now = chrono::Utc::now().timestamp(); let mut stmt = conn.prepare_cached( @@ -135,6 +153,8 @@ pub fn get_session(conn: &rusqlite::Connection, session_id: &str) -> Result Result<()> { conn.execute( "DELETE FROM admin_sessions WHERE id = ?1", @@ -144,6 +164,9 @@ pub fn delete_session(conn: &rusqlite::Connection, session_id: &str) -> Result<( } /// Clean up expired sessions (called periodically). +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn purge_expired_sessions(conn: &rusqlite::Connection) -> Result { let now = chrono::Utc::now().timestamp(); let n = conn.execute( @@ -157,13 +180,16 @@ pub fn purge_expired_sessions(conn: &rusqlite::Connection) -> Result { /// Check whether `ip_hash` is currently banned. Returns the ban reason if so. /// -/// FIX[HIGH-1]: Switched to prepare_cached — this is called on every post +/// FIX[HIGH-1]: Switched to `prepare_cached` — this is called on every post /// submission and was recompiling the statement on every call. /// -/// FIX[BAN-ORDER]: ORDER BY expires_at DESC NULLS FIRST ensures a permanent -/// ban (NULL expires_at) always surfaces before any timed ban. +/// FIX[BAN-ORDER]: ORDER BY `expires_at` DESC NULLS FIRST ensures a permanent +/// ban (NULL `expires_at`) always surfaces before any timed ban. +/// +/// Note: NULLS FIRST requires `SQLite` ≥ 3.30.0 (released 2019-10-04). /// -/// Note: NULLS FIRST requires SQLite ≥ 3.30.0 (released 2019-10-04). +/// # Errors +/// Returns an error if the database operation fails. pub fn is_banned(conn: &rusqlite::Connection, ip_hash: &str) -> Result> { let now = chrono::Utc::now().timestamp(); let mut stmt = conn.prepare_cached( @@ -176,10 +202,13 @@ pub fn is_banned(conn: &rusqlite::Connection, ip_hash: &str) -> Result Result<()> { let n = conn .execute("DELETE FROM bans WHERE id = ?1", params![id]) .context("Failed to remove ban")?; if n == 0 { - anyhow::bail!("Ban id {} not found", id); + anyhow::bail!("Ban id {id} not found"); } Ok(()) } -/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. +/// FIX[LOW-9]: Switched from bare prepare to `prepare_cached`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn list_bans(conn: &rusqlite::Connection) -> Result> { let mut stmt = conn.prepare_cached( "SELECT id, ip_hash, reason, expires_at, created_at FROM bans ORDER BY created_at DESC", @@ -229,8 +264,11 @@ pub fn list_bans(conn: &rusqlite::Connection) -> Result> { // ─── Word filter queries ────────────────────────────────────────────────────── -/// FIX[HIGH-2]: Switched to prepare_cached — called on every post submission +/// FIX[HIGH-2]: Switched to `prepare_cached` — called on every post submission /// to apply word filters; recompiling the statement every time was wasteful. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_word_filters(conn: &rusqlite::Connection) -> Result> { let mut stmt = conn.prepare_cached("SELECT id, pattern, replacement FROM word_filters")?; let filters = stmt @@ -245,7 +283,10 @@ pub fn get_word_filters(conn: &rusqlite::Connection) -> Result> Ok(filters) } -/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + `last_insert_rowid()`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn add_word_filter( conn: &rusqlite::Connection, pattern: &str, @@ -261,6 +302,8 @@ pub fn add_word_filter( Ok(id) } +/// # Errors +/// Returns an error if the database operation fails. pub fn remove_word_filter(conn: &rusqlite::Connection, id: i64) -> Result<()> { conn.execute("DELETE FROM word_filters WHERE id = ?1", params![id])?; Ok(()) @@ -272,11 +315,11 @@ pub fn remove_word_filter(conn: &rusqlite::Connection, id: i64) -> Result<()> { /// against `post_id` that is still open. /// /// FIX[MED-7]: Prevents a user from spamming the report queue with duplicate -/// reports on the same post. Called inside file_report before the INSERT. +/// reports on the same post. Called inside `file_report` before the INSERT. /// -/// Note: There is a TOCTOU race between has_reported_post and the INSERT in -/// file_report (two concurrent requests can both pass the check). A full fix -/// would require a schema-level UNIQUE(post_id, reporter_hash) constraint, but +/// Note: There is a TOCTOU race between `has_reported_post` and the INSERT in +/// `file_report` (two concurrent requests can both pass the check). A full fix +/// would require a schema-level `UNIQUE(post_id, reporter_hash)` constraint, but /// that would block re-reporting after a resolved report. The guard here is /// sufficient to prevent accidental spam; deliberate concurrent abuse is /// extremely unlikely in practice. @@ -296,8 +339,11 @@ fn has_reported_post( /// File a new report against a post. Returns the new report id. /// -/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). -/// FIX[MED-7]: Duplicate-report guard added via has_reported_post. +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + `last_insert_rowid()`. +/// FIX[MED-7]: Duplicate-report guard added via `has_reported_post`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn file_report( conn: &rusqlite::Connection, post_id: i64, @@ -307,7 +353,7 @@ pub fn file_report( reporter_hash: &str, ) -> Result { if has_reported_post(conn, post_id, reporter_hash)? { - anyhow::bail!("Already reported post {}", post_id); + anyhow::bail!("Already reported post {post_id}"); } let id: i64 = conn .query_row( @@ -321,7 +367,10 @@ pub fn file_report( } /// Return all open reports enriched with board name and post preview. -/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. +/// FIX[LOW-9]: Switched from bare prepare to `prepare_cached`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_open_reports( conn: &rusqlite::Connection, ) -> Result> { @@ -365,6 +414,9 @@ pub fn get_open_reports( /// Resolve a report (mark it closed). /// FIX[MED-5]: Added rows-affected check. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn resolve_report(conn: &rusqlite::Connection, report_id: i64, admin_id: i64) -> Result<()> { let n = conn .execute( @@ -374,12 +426,15 @@ pub fn resolve_report(conn: &rusqlite::Connection, report_id: i64, admin_id: i64 ) .context("Failed to resolve report")?; if n == 0 { - anyhow::bail!("Report id {} not found", report_id); + anyhow::bail!("Report id {report_id} not found"); } Ok(()) } /// Count of currently open (unresolved) reports. +/// +/// # Errors +/// Returns an error if the database operation fails. #[allow(dead_code)] pub fn open_report_count(conn: &rusqlite::Connection) -> Result { Ok(conn.query_row( @@ -392,6 +447,9 @@ pub fn open_report_count(conn: &rusqlite::Connection) -> Result { // ─── Moderation log ─────────────────────────────────────────────────────────── /// Append one entry to the moderation action log. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn log_mod_action( conn: &rusqlite::Connection, admin_id: i64, @@ -420,7 +478,10 @@ pub fn log_mod_action( } /// Retrieve a page of mod log entries, newest first. -/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. +/// FIX[LOW-9]: Switched from bare prepare to `prepare_cached`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_mod_log( conn: &rusqlite::Connection, limit: i64, @@ -449,7 +510,10 @@ pub fn get_mod_log( Ok(rows.collect::>>()?) } -/// Total count of mod_log entries (for pagination). +/// Total count of `mod_log` entries (for pagination). +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn count_mod_log(conn: &rusqlite::Connection) -> Result { Ok(conn.query_row("SELECT COUNT(*) FROM mod_log", [], |r| r.get(0))?) } @@ -458,13 +522,16 @@ pub fn count_mod_log(conn: &rusqlite::Connection) -> Result { /// Insert a new ban appeal. Returns the new appeal id. /// -/// FIX[MED-4]: INSERT … RETURNING id replaces execute + last_insert_rowid(). +/// FIX[MED-4]: INSERT … RETURNING id replaces execute + `last_insert_rowid()`. /// -/// Note (TOCTOU): has_recent_appeal and file_ban_appeal have a race — two +/// Note (TOCTOU): `has_recent_appeal` and `file_ban_appeal` have a race — two /// concurrent requests from the same IP can both pass the check and both -/// insert appeals. A full fix requires a schema-level UNIQUE(ip_hash) or a +/// insert appeals. A full fix requires a schema-level UNIQUE(`ip_hash`) or a /// time-windowed partial unique index. The guard is retained as a best-effort /// spam deterrent for the common (non-concurrent) case. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn file_ban_appeal(conn: &rusqlite::Connection, ip_hash: &str, reason: &str) -> Result { let id: i64 = conn .query_row( @@ -477,6 +544,9 @@ pub fn file_ban_appeal(conn: &rusqlite::Connection, ip_hash: &str, reason: &str) } /// Return all open ban appeals, newest first. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_open_ban_appeals(conn: &rusqlite::Connection) -> Result> { let mut stmt = conn.prepare_cached( "SELECT id, ip_hash, reason, status, created_at @@ -497,6 +567,9 @@ pub fn get_open_ban_appeals(conn: &rusqlite::Connection) -> Result Result<()> { let n = conn .execute( @@ -505,16 +578,19 @@ pub fn dismiss_ban_appeal(conn: &rusqlite::Connection, appeal_id: i64) -> Result ) .context("Failed to dismiss ban appeal")?; if n == 0 { - anyhow::bail!("Ban appeal id {} not found", appeal_id); + anyhow::bail!("Ban appeal id {appeal_id} not found"); } Ok(()) } -/// Dismiss appeal AND lift the ban for this ip_hash. +/// Dismiss appeal AND lift the ban for this `ip_hash`. /// /// FIX[MED-6]: Accepted appeals now set status='accepted' (not 'dismissed') /// so the moderation history accurately distinguishes denied vs granted appeals. -/// The valid status values for BanAppeal are: "open" | "dismissed" | "accepted". +/// The valid status values for `BanAppeal` are: "open" | "dismissed" | "accepted". +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn accept_ban_appeal(conn: &rusqlite::Connection, appeal_id: i64, ip_hash: &str) -> Result<()> { // Both updates must succeed together. let tx = conn @@ -528,7 +604,7 @@ pub fn accept_ban_appeal(conn: &rusqlite::Connection, appeal_id: i64, ip_hash: & .context("Failed to accept ban appeal")?; if n == 0 { tx.rollback().ok(); - anyhow::bail!("Ban appeal id {} not found", appeal_id); + anyhow::bail!("Ban appeal id {appeal_id} not found"); } tx.execute("DELETE FROM bans WHERE ip_hash=?1", params![ip_hash]) .context("Failed to lift ban during appeal acceptance")?; @@ -538,6 +614,9 @@ pub fn accept_ban_appeal(conn: &rusqlite::Connection, appeal_id: i64, ip_hash: & } /// Count of currently open ban appeals. +/// +/// # Errors +/// Returns an error if the database operation fails. #[allow(dead_code)] pub fn open_appeal_count(conn: &rusqlite::Connection) -> Result { Ok(conn.query_row( @@ -547,10 +626,13 @@ pub fn open_appeal_count(conn: &rusqlite::Connection) -> Result { )?) } -/// Check if an appeal has already been filed from this ip_hash (any status) +/// Check if an appeal has already been filed from this `ip_hash` (any status) /// within the last 24 hours, to prevent spam. /// -/// Note (TOCTOU): see file_ban_appeal for the concurrency caveat. +/// Note (TOCTOU): see `file_ban_appeal` for the concurrency caveat. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn has_recent_appeal(conn: &rusqlite::Connection, ip_hash: &str) -> Result { let cutoff = chrono::Utc::now().timestamp() - 86400; let count: i64 = conn.query_row( @@ -564,6 +646,9 @@ pub fn has_recent_appeal(conn: &rusqlite::Connection, ip_hash: &str) -> Result Result { Ok(conn.query_row( "SELECT COUNT(*) FROM posts WHERE ip_hash = ?1", @@ -573,15 +658,18 @@ pub fn count_posts_by_ip_hash(conn: &rusqlite::Connection, ip_hash: &str) -> Res } /// Return paginated posts by IP hash, newest first, across all boards. -/// Each post is joined with its board short_name for display. +/// Each post is joined with its board `short_name` for display. /// /// FIX[HIGH-3]: Replaced the three-way join (posts → threads → boards) with a -/// direct two-way join (posts → boards). The posts table already carries board_id, +/// direct two-way join (posts → boards). The posts table already carries `board_id`, /// making the threads join unnecessary. The old join also silently hid any posts /// whose thread had been deleted (orphaned posts) because the INNER JOIN on /// threads would exclude them. /// -/// FIX[LOW-9]: Switched from bare prepare to prepare_cached. +/// FIX[LOW-9]: Switched from bare prepare to `prepare_cached`. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_posts_by_ip_hash( conn: &rusqlite::Connection, ip_hash: &str, @@ -616,9 +704,9 @@ pub fn get_posts_by_ip_hash( // ─── Database maintenance ───────────────────────────────────────────────────── -/// Run PRAGMA wal_checkpoint(TRUNCATE) and return (log_pages, checkpointed_pages, busy). +/// Run PRAGMA `wal_checkpoint(TRUNCATE)` and return (`log_pages`, `checkpointed_pages`, busy). /// -/// The raw PRAGMA wal_checkpoint pragma returns three columns in this order: +/// The raw PRAGMA `wal_checkpoint` pragma returns three columns in this order: /// col 0 — busy: 1 if a checkpoint could not complete due to an active reader/writer /// col 1 — log: total pages in the WAL file /// col 2 — checkpointed: pages actually written back to the database @@ -630,6 +718,9 @@ pub fn get_posts_by_ip_hash( /// /// TRUNCATE mode: after a complete checkpoint, the WAL file is truncated to /// zero bytes, reclaiming disk space immediately. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn run_wal_checkpoint(conn: &rusqlite::Connection) -> Result<(i64, i64, i64)> { let (busy, log_pages, checkpointed) = conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |r| { @@ -643,12 +734,15 @@ pub fn run_wal_checkpoint(conn: &rusqlite::Connection) -> Result<(i64, i64, i64) } /// Return the current on-disk size of the database in bytes -/// (page_count × page_size, as reported by SQLite). +/// (`page_count` × `page_size`, as reported by `SQLite`). /// /// FIX[LOW-11]: Note that this does NOT include the WAL file size. When the /// database is in WAL mode, the total on-disk footprint is this value plus the -/// size of the .db-wal file. Call run_wal_checkpoint before get_db_size_bytes +/// size of the .db-wal file. Call `run_wal_checkpoint` before `get_db_size_bytes` /// if you need a reliable post-checkpoint size. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_db_size_bytes(conn: &rusqlite::Connection) -> Result { let page_count: i64 = conn.query_row("PRAGMA page_count", [], |r| r.get(0))?; let page_size: i64 = conn.query_row("PRAGMA page_size", [], |r| r.get(0))?; @@ -662,6 +756,9 @@ pub fn get_db_size_bytes(conn: &rusqlite::Connection) -> Result { /// the full rebuild is complete; for large databases this may take several /// seconds. Always call `get_db_size_bytes` before and after to report the /// space saving to the operator. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn run_vacuum(conn: &rusqlite::Connection) -> Result<()> { conn.execute_batch("VACUUM")?; Ok(()) diff --git a/src/db/boards.rs b/src/db/boards.rs index c4cdbe9..0957042 100644 --- a/src/db/boards.rs +++ b/src/db/boards.rs @@ -21,7 +21,7 @@ // LOW-12 .context() added on key operations // LOW-13 Note: unixepoch() requires SQLite ≥ 3.38.0 (2022-02-22) -use crate::models::*; +use crate::models::Board; use anyhow::{Context, Result}; use rusqlite::{params, OptionalExtension}; @@ -54,8 +54,11 @@ pub(super) fn map_board(row: &rusqlite::Row<'_>) -> rusqlite::Result { /// Read a site-wide setting by key. Returns None if the key has never been set. /// -/// FIX[MED-9]: Switched to prepare_cached — convenience helpers (get_site_name, -/// get_site_subtitle, etc.) call this on every page render. +/// FIX[MED-9]: Switched to `prepare_cached` — convenience helpers (`get_site_name`, +/// `get_site_subtitle`, etc.) call this on every page render. +/// +/// # Errors +/// Returns an error if the database operation fails. pub fn get_site_setting(conn: &rusqlite::Connection, key: &str) -> Result> { let mut stmt = conn.prepare_cached("SELECT value FROM site_settings WHERE key = ?1")?; let result = stmt @@ -65,6 +68,9 @@ pub fn get_site_setting(conn: &rusqlite::Connection, key: &str) -> Result