From 697fc19594db67af6fb87452190de3ceee42948a Mon Sep 17 00:00:00 2001 From: Joy Barot Date: Sat, 7 Mar 2026 18:18:13 -0500 Subject: [PATCH] feat: new interactive Web UI with offline support - Added a brand new, visually interactive `codecat web` interface. - Implemented fully offline functionality by bundling local CSS/JS assets (no external CDNs). - Minor fixes in the core codebase for improved performance and readability - Expanded README to document the new `codecat web` command and features. --- .flake8 | 3 +- README.md | 10 + codecat.spec | 19 +- pyproject.toml | 4 +- src/codecat/__init__.py | 2 +- src/codecat/__main__.py | 11 +- src/codecat/cli_app.py | 54 +- src/codecat/config.py | 12 +- src/codecat/file_processor.py | 4 +- src/codecat/file_scanner.py | 46 +- src/codecat/markdown_generator.py | 4 +- src/codecat/web_ui.py | 945 ++++++++++++++++++++++++++++++ tests/test_cli_app.py | 2 +- tests/test_config.py | 12 +- tests/test_file_scanner.py | 14 +- tests/test_web_ui.py | 426 ++++++++++++++ 16 files changed, 1493 insertions(+), 75 deletions(-) create mode 100644 src/codecat/web_ui.py create mode 100644 tests/test_web_ui.py diff --git a/.flake8 b/.flake8 index 171648c..0045200 100644 --- a/.flake8 +++ b/.flake8 @@ -9,7 +9,8 @@ max-line-length = 88 # W503: Line break before binary operator # W504: line break after binary operator # E203: Whitespace before ':' -ignore = E501, W503, W504, E203 +# W291/W293: trailing/blank-line whitespace +ignore = E501, W503, W504, E203, W291, W293 # A list of files and directories to exclude from linting. exclude = diff --git a/README.md b/README.md index 642c1e8..1ad703f 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,12 @@ Codecat is a lightning-fast, Python-powered CLI tool that **aggregates your enti - **Dynamic fence handling** for code blocks containing backticks - **Glob pattern support** for flexible file inclusion/exclusion +### 🌐 **Interactive Web UI** + +- **Visual Dashboard** provides a browser-based interface to manage everything +- **Fully Offline** operation with bundled assets—no internet required +- **Seamless Integration** perfectly reflects all CLI capabilities + ### ⚙️ **Highly Configurable** - **JSON configuration** with sensible defaults @@ -123,6 +129,7 @@ codecat stats . | Command | Description | Example | | ------------------------- | ----------------------------------------- | -------------------------- | | `codecat run ` | Scan directory and create Markdown output | `codecat run ./my-project` | +| `codecat web` | Launch the interactive Web UI | `codecat web` | | `codecat stats ` | Show project statistics without output | `codecat stats .` | | `codecat generate-config` | Create configuration template | `codecat generate-config` | @@ -155,6 +162,9 @@ codecat run . # Simple scan of current directory codecat run . +# Launch the interactive web interface +codecat web + # Scan specific directory with custom output codecat run ./my-project --output-file "project-complete.md" diff --git a/codecat.spec b/codecat.spec index d09f821..49b31a9 100644 --- a/codecat.spec +++ b/codecat.spec @@ -1,21 +1,20 @@ # -*- mode: python ; coding: utf-8 -*- -block_cipher = None a = Analysis( ['src/codecat/__main__.py'], - pathex=['src'], + pathex=[], binaries=[], - datas=[('assets/favicon.ico', '.')], + datas=[], hiddenimports=[], hookspath=[], + hooksconfig={}, runtime_hooks=[], excludes=[], noarchive=False, optimize=0, ) - -pyz = PYZ(a.pure, cipher=block_cipher) +pyz = PYZ(a.pure) exe = EXE( pyz, @@ -31,7 +30,9 @@ exe = EXE( upx_exclude=[], runtime_tmpdir=None, console=True, - icon='assets/favicon.ico', - include_binaries=True, - version='file_version_info.txt', -) \ No newline at end of file + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) diff --git a/pyproject.toml b/pyproject.toml index eb4f8c9..b54f11b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" # --- Project Metadata --- [project] name = "codecat" -version = "1.0.2" +version = "1.1.0" description = "A powerful, feature-rich command-line tool to aggregate source code into a single Markdown file." readme = "README.md" requires-python = ">=3.10" @@ -19,7 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "License :: OSI Approved :: GPL-3.0-only", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: OS Independent", "Topic :: Software Development :: Documentation", "Topic :: Text Processing", diff --git a/src/codecat/__init__.py b/src/codecat/__init__.py index 4dc25d2..af69535 100644 --- a/src/codecat/__init__.py +++ b/src/codecat/__init__.py @@ -7,4 +7,4 @@ This package contains the core logic and metadata for the Codecat application. """ -__version__ = "1.0.2" +__version__ = "1.1.0" diff --git a/src/codecat/__main__.py b/src/codecat/__main__.py index 1e8e258..3797c7e 100644 --- a/src/codecat/__main__.py +++ b/src/codecat/__main__.py @@ -9,6 +9,7 @@ and also serves as the entry for PyInstaller-built executables. """ +import logging import os import sys @@ -39,7 +40,7 @@ def main(): "Example Usage:\n" " [cyan]codecat --help[/cyan]\n\n" "For more information, visit the GitHub README:\n" - " [cyan][link=https://github.com/exonymos/codecat?tab=readme-ov-file#-codecat]https://github.com/exonymos/codecat[/link][/cyan]", + " [cyan][link=https://github.com/exonymos/codecat]https://github.com/exonymos/codecat[/link][/cyan]", title="Usage Error", border_style="red", padding=(1, 2), @@ -50,8 +51,14 @@ def main(): os.system("pause") sys.exit(1) + logging.basicConfig( + level=logging.WARNING, + stream=sys.stderr, + format="%(message)s", + ) + # If it's a valid terminal session or has arguments, run the main Typer app. - app() + app(prog_name="codecat") if __name__ == "__main__": diff --git a/src/codecat/cli_app.py b/src/codecat/cli_app.py index e7bf889..318cae6 100644 --- a/src/codecat/cli_app.py +++ b/src/codecat/cli_app.py @@ -14,7 +14,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import nullcontext from pathlib import Path -from typing import List, Optional +from typing import Optional import typer from rich.console import Console @@ -30,6 +30,7 @@ from codecat.file_processor import ProcessedFileData, process_file from codecat.file_scanner import scan_project from codecat.markdown_generator import generate_markdown +from codecat.web_ui import start_web_app # --- Initialize Rich Console for output --- console = Console(stderr=True, highlight=False) @@ -78,7 +79,7 @@ def version_callback(value: bool): ] IncludePatterns = Annotated[ - Optional[List[str]], + Optional[list[str]], typer.Option( "--include", "-i", @@ -87,7 +88,7 @@ def version_callback(value: bool): ] ExcludePatterns = Annotated[ - Optional[List[str]], + Optional[list[str]], typer.Option( "--exclude", "-e", @@ -98,7 +99,7 @@ def version_callback(value: bool): # --- Helper Functions for Rich UI and Output --- def _create_summary_table( - processed_results: List[ProcessedFileData], project_path: Path + processed_results: list[ProcessedFileData], project_path: Path ) -> Table: """Creates a Rich Table summarizing the results of a scan.""" summary = Table( @@ -156,7 +157,7 @@ def _log_initial_info( def _scan_project_files( project_path: Path, effective_config: dict, show_ui: bool -) -> List[Path]: +) -> list[Path]: """Scans the project for files to process, handling UI and errors.""" scan_status_text = f"Scanning files in [cyan]'{project_path.name}'[/cyan]..." scan_context = ( @@ -189,17 +190,17 @@ def _scan_project_files( def _process_files_parallel( - files_to_scan: List[Path], + files_to_scan: list[Path], project_path: Path, effective_config: dict, show_ui: bool, max_workers: Optional[int], -) -> List[ProcessedFileData]: +) -> list[ProcessedFileData]: """ Processes a list of files in parallel, showing a static message and handling errors. Returns a sorted list of ProcessedFileData objects. """ - processed_results: List[ProcessedFileData] = [] + processed_results: list[ProcessedFileData] = [] is_verbose = effective_config.get("verbose", False) stop_on_error = effective_config.get("stop_on_error", False) @@ -263,7 +264,7 @@ def _orchestrate_scan( effective_config: dict, show_ui: bool, max_workers: Optional[int], -) -> List[ProcessedFileData]: +) -> list[ProcessedFileData]: """ Handles the shared logic of scanning and processing files for any command. @@ -340,6 +341,8 @@ def run( Scans a project, aggregates files, and compiles them into a single Markdown file. """ is_verbose = verbose and not silent + # NOTE: Checking for pytest in sys.modules avoids the Rich Status spinner conflicting + # with the test runner's output capture. This is a deliberate practical trade-off; is_testing = "pytest" in sys.modules show_ui = not is_verbose and not silent and not is_testing @@ -431,7 +434,8 @@ def stats( ] for file_data in text_files: - assert file_data.content is not None + if file_data.content is None: + continue lang = lang_map.get(file_data.path.suffix.lower(), "text") lang_counts[lang] += 1 num_lines = len(file_data.content.splitlines()) @@ -515,8 +519,9 @@ def generate_config( raise typer.Exit(code=1) try: - with open(config_file_path, "w", encoding="utf-8") as f: - json.dump(DEFAULT_CONFIG, f, indent=4) + config_file_path.write_text( + json.dumps(DEFAULT_CONFIG, indent=4), encoding="utf-8" + ) console.print( f"Successfully generated config file: [green]{config_file_path.resolve()}[/green]" ) @@ -527,6 +532,31 @@ def generate_config( raise typer.Exit(code=1) +@app.command(name="web") +def web( + project_path: ProjectPath = Path("."), + port: Annotated[ + int, + typer.Option( + "--port", + "-p", + help="The port to bind the web server to. Defaults to 8080. If in use, it will find the next available port.", + ), + ] = 8080, +): + """ + Launch the optional Codecat Web Interface. + """ + console.print( + Panel( + f"🌐 [bold]Starting Codecat Web Interface[/bold]\n" + f"Target Directory: [cyan]{project_path.resolve()}[/cyan]", + border_style="magenta", + ) + ) + start_web_app(port=port, project_path=project_path) + + @app.callback() def main_callback( version: Annotated[ diff --git a/src/codecat/config.py b/src/codecat/config.py index 8e7036e..413e417 100644 --- a/src/codecat/config.py +++ b/src/codecat/config.py @@ -10,12 +10,10 @@ import copy import json +import logging from pathlib import Path from typing import Any, Optional -import typer -from typer import colors as typer_colors - # Import constants for default config file names. from codecat.constants import DEFAULT_CONFIG_FILENAME, DEFAULT_OUTPUT_FILENAME @@ -354,10 +352,10 @@ def _load_user_config_from_file( } return user_config, True except (json.JSONDecodeError, IOError) as e: - typer.secho( - f"Notice: Could not load or parse config '{config_path.resolve()}'. Error: {e}.", - fg=typer_colors.YELLOW, - err=True, + logging.warning( + "Notice: Could not load or parse config '%s'. Error: %s.", + config_path.resolve(), + e, ) return None, False return None, False diff --git a/src/codecat/file_processor.py b/src/codecat/file_processor.py index 2bff3fa..113c2aa 100644 --- a/src/codecat/file_processor.py +++ b/src/codecat/file_processor.py @@ -46,9 +46,7 @@ def _is_likely_binary_by_nulls(chunk: bytes) -> bool: if not chunk: return False null_bytes = chunk.count(b"\x00") - return ( - len(chunk) > 0 and (null_bytes / len(chunk)) * 100 > NULL_BYTE_THRESHOLD_PERCENT - ) + return (null_bytes / len(chunk)) * 100 > NULL_BYTE_THRESHOLD_PERCENT def _try_decode_bytes( diff --git a/src/codecat/file_scanner.py b/src/codecat/file_scanner.py index ecc131e..8c1b53e 100644 --- a/src/codecat/file_scanner.py +++ b/src/codecat/file_scanner.py @@ -9,13 +9,16 @@ """ import fnmatch +import logging import os from pathlib import Path from typing import Any, Dict, List, Optional, Set -import typer from rich.status import Status -from typer import colors as typer_colors + +# --- Module-level constants --- + +CASE_SENSITIVE_MATCHING: bool = False # --- Internal Helper Functions for Pattern Matching --- @@ -65,10 +68,9 @@ def _passes_file_specific_checks( """Performs checks specific to files: explicit exclusion by name and max size.""" if abs_item_path in exclude_files_abs: if is_verbose: - typer.secho( - f"Skipping explicitly excluded file: {abs_item_path.relative_to(project_root_path)}", - fg=typer_colors.YELLOW, - err=True, + logging.debug( + "Skipping explicitly excluded file: %s", + abs_item_path.relative_to(project_root_path), ) return False @@ -76,18 +78,19 @@ def _passes_file_specific_checks( file_size = abs_item_path.stat().st_size if file_size > max_size_bytes: if is_verbose: - typer.secho( - f"Skipping large file: {abs_item_path.relative_to(project_root_path)} ({file_size / 1024:.2f}KB > {max_size_bytes / 1024:.0f}KB)", - fg=typer_colors.YELLOW, - err=True, + logging.debug( + "Skipping large file: %s (%.2fKB > %.0fKB)", + abs_item_path.relative_to(project_root_path), + file_size / 1024, + max_size_bytes / 1024, ) return False - except (FileNotFoundError, Exception) as e: + except OSError as e: if is_verbose: - typer.secho( - f"Warning: Could not get size for file {abs_item_path}: {e}", - fg=typer_colors.RED, - err=True, + logging.warning( + "Warning: Could not get size for file %s: %s", + abs_item_path, + e, ) return False return True @@ -106,7 +109,6 @@ def scan_project( Scans the project directory using os.walk for efficiency and returns a list of files. """ included_files_set: Set[Path] = set() - case_sensitive_matching = False exclude_dirs_set: Set[str] = set(config.get("exclude_dirs", [])) exclude_files_abs: Set[Path] = { @@ -137,7 +139,7 @@ def scan_project( continue if _is_path_excluded_by_pattern( - dir_rel_path_str, exclude_patterns, case_sensitive_matching + dir_rel_path_str, exclude_patterns, CASE_SENSITIVE_MATCHING ): continue @@ -155,12 +157,12 @@ def scan_project( ) if _is_path_excluded_by_pattern( - relative_path_str, exclude_patterns, case_sensitive_matching + relative_path_str, exclude_patterns, CASE_SENSITIVE_MATCHING ): continue if not _is_path_included_by_pattern( - relative_path_str, include_patterns, case_sensitive_matching + relative_path_str, include_patterns, CASE_SENSITIVE_MATCHING ): continue @@ -175,10 +177,6 @@ def scan_project( included_files_set.add(abs_file_path) if is_verbose: - typer.secho( - f"Including file: {relative_path_str}", - fg=typer_colors.GREEN, - err=True, - ) + logging.debug("Including file: %s", relative_path_str) return sorted(list(included_files_set)) diff --git a/src/codecat/markdown_generator.py b/src/codecat/markdown_generator.py index b8d5523..6203db9 100644 --- a/src/codecat/markdown_generator.py +++ b/src/codecat/markdown_generator.py @@ -56,7 +56,7 @@ def generate_markdown( lang_map = config.get("language_hints", {}) if config.get("generate_header", True): - project_path_str = str(project_root_path).replace("\\", "/") + project_path_str = project_root_path.as_posix() main_parts.append(f"# Codecat: Aggregated Code for '{project_root_path.name}'") main_parts.append( f"Generated from `{len(processed_files)}` files found in `{project_path_str}`.\n" @@ -65,7 +65,7 @@ def generate_markdown( file_blocks: List[str] = [] for file_data in processed_files: block_parts: List[str] = [] - relative_path_str = str(file_data.relative_path).replace("\\", "/") + relative_path_str = file_data.relative_path.as_posix() block_parts.append(f"## File: `{relative_path_str}`\n") if file_data.status == "text_content" and file_data.content is not None: diff --git a/src/codecat/web_ui.py b/src/codecat/web_ui.py new file mode 100644 index 0000000..d5e8e61 --- /dev/null +++ b/src/codecat/web_ui.py @@ -0,0 +1,945 @@ +# src/codecat/web_ui.py + +""" +Provides an optional web-based GUI for Codecat. + +Uses Python's built-in http.server to serve a single-page HTML/CSS/JS +application. Receives configuration from the frontend and executes the +Codecat CLI via subprocess, streaming logs back in real time. + +Architecture notes +------------------ +- The handler class is created inside ``_make_handler`` so that + ``project_path`` is captured by closure, avoiding shared class-level + mutable state between requests. +- Pattern normalisation lives in one place (``_normalize_patterns``) and + is reused by both POST handlers. +- The project path is injected into the HTML as a JSON-encoded `` + + +""" + + +def _build_page(project_path: Path) -> bytes: + """ + Assemble the full HTML page, injecting *project_path* as a safe + JSON-encoded ``\n" + return (_HTML_HEAD + inject + _HTML_TAIL).encode("utf-8") + + +def _normalize_patterns(raw: list[Any]) -> list[str]: + """ + Normalise a heterogeneous list of raw pattern values into a flat + list of clean, non-empty strings. + + Each element is stringified, then split on commas and newlines. + Whitespace is stripped and empty entries are dropped. + + This single implementation is shared by both POST handlers so that + the normalisation logic never drifts out of sync. + """ + result: list[str] = [] + for item in raw: + for part in str(item).replace(",", "\n").split("\n"): + cleaned = part.strip() + if cleaned: + result.append(cleaned) + return result + + +# --------------------------------------------------------------------------- +# Module-level constant +# --------------------------------------------------------------------------- + +_MAX_POST_BYTES: int = 64 * 1024 # 64 KB hard limit per request + +# --------------------------------------------------------------------------- +# Handler helpers +# +# Every function that handles HTTP I/O accepts a plain +# ``http.server.BaseHTTPRequestHandler`` instance as its first argument. +# --------------------------------------------------------------------------- + +_Handler = http.server.BaseHTTPRequestHandler # local alias for type hints + + +def _send_head( + h: _Handler, + status: int, + content_type: str, + extra: dict[str, str] | None = None, +) -> None: + """Send status line and common headers. Always adds ``Connection: close``.""" + h.send_response(status) + h.send_header("Content-Type", content_type) + h.send_header("Connection", "close") + if extra: + for key, val in extra.items(): + h.send_header(key, val) + h.end_headers() + + +def _send_json( + h: _Handler, + payload: dict[str, Any], + status: int = 200, +) -> None: + """Serialise *payload* as JSON and write it to the response.""" + body = json.dumps(payload).encode("utf-8") + _send_head( + h, + status, + "application/json; charset=utf-8", + {"Content-Length": str(len(body))}, + ) + h.wfile.write(body) + + +def _read_post_body(h: _Handler) -> bytes | None: + """ + Read the POST body up to ``_MAX_POST_BYTES``. + + Sends a 400 and returns ``None`` if the body is too large, allowing + callers to return immediately without further processing. + """ + try: + length = int(h.headers.get("Content-Length", 0)) + except ValueError: + length = 0 + if length > _MAX_POST_BYTES: + _send_json( + h, + {"success": False, "error": "Request body exceeds 64 KB limit."}, + 400, + ) + return None + return h.rfile.read(length) + + +def _handle_get(h: _Handler, project_path: Path) -> None: + """Route GET requests to the appropriate sub-handler.""" + clean = h.path.split("?")[0] + match clean: + case "/" | "/index.html": + page = _build_page(project_path) + _send_head( + h, + 200, + "text/html; charset=utf-8", + {"Content-Length": str(len(page))}, + ) + h.wfile.write(page) + case "/api/config": + _handle_get_config(h, project_path) + case _: + body = b"Not Found" + _send_head( + h, + 404, + "text/plain; charset=utf-8", + {"Content-Length": str(len(body))}, + ) + h.wfile.write(body) + + +def _handle_get_config(h: _Handler, project_path: Path) -> None: + """Return the contents of ``.codecat_config.json`` as JSON.""" + config_file = project_path / ".codecat_config.json" + if not config_file.is_file(): + _send_json(h, {"success": False, "error": "Config file not found."}) + return + try: + data = json.loads(config_file.read_text(encoding="utf-8")) + _send_json(h, {"success": True, "data": data}) + except (json.JSONDecodeError, OSError) as exc: + _send_json(h, {"success": False, "error": str(exc)}) + + +def _handle_post(h: _Handler, project_path: Path) -> None: + """Route POST requests to the appropriate sub-handler.""" + clean = h.path.split("?")[0] + match clean: + case "/api/config": + _handle_post_config(h, project_path) + case "/api/run": + _handle_post_run(h, project_path) + case _: + body = b"Not Found" + _send_head( + h, + 404, + "text/plain; charset=utf-8", + {"Content-Length": str(len(body))}, + ) + h.wfile.write(body) + + +def _build_run_config(data: dict[str, Any]) -> dict[str, Any]: + """ + Translate the frontend payload into a Codecat config dictionary. + + Extracted so that ``_handle_post_config`` and ``_handle_post_run`` + share the same mapping logic without duplicating it. + """ + cfg: dict[str, Any] = {} + + output_file = str(data.get("outputFile", "")).strip() + if output_file: + cfg["output_file"] = output_file + + includes = _normalize_patterns(data.get("includes", [])) + excludes = _normalize_patterns(data.get("excludes", [])) + if includes: + cfg["include_patterns"] = includes + if excludes: + cfg["exclude_patterns"] = excludes + + no_header = data.get("noHeader") + if no_header is not None: + cfg["generate_header"] = not bool(no_header) + + return cfg + + +def _handle_post_config(h: _Handler, project_path: Path) -> None: + """Write frontend config payload to ``.codecat_config.json``.""" + raw = _read_post_body(h) + if raw is None: + return + + try: + data: dict[str, Any] = json.loads(raw.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as exc: + _send_json(h, {"success": False, "error": f"Invalid JSON: {exc}"}, 400) + return + + config = _build_run_config(data) + try: + config_path = project_path / ".codecat_config.json" + config_path.write_text(json.dumps(config, indent=4), encoding="utf-8") + _send_json(h, {"success": True}) + except OSError as exc: + _send_json(h, {"success": False, "error": str(exc)}) + + +def _build_subprocess_cmd( + project_path: Path, tmp_config_path: Path, dry_run: bool +) -> list[str]: + """ + Build the ``codecat run`` command list. + + Patterns are passed via a temp config file (``--config``) rather than + as individual ``--include``/``--exclude`` arguments. + """ + cmd = [ + sys.executable, + "-m", + "codecat", + "run", + str(project_path), + "--config", + str(tmp_config_path), + ] + if dry_run: + cmd.append("--dry-run") + return cmd + + +def _stream_subprocess(h: _Handler, cmd: list[str], project_path: Path) -> None: + """ + Spawn *cmd* and stream its combined stdout/stderr to the response body. + + Handles ``BrokenPipeError`` / ``ConnectionResetError`` gracefully so + that closing the browser tab mid-run does not produce a traceback. + """ + env: dict[str, str] = { + **os.environ, + "NO_COLOR": "1", + "PYTHONIOENCODING": "utf-8", + "PYTHONUNBUFFERED": "1", + } + try: + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + errors="replace", + env=env, + cwd=str(project_path), + bufsize=1, + shell=False, + ) + if proc.stdout is not None: + while True: + line = proc.stdout.readline() + if not line: + break + try: + h.wfile.write(line.encode("utf-8")) + h.wfile.flush() + except (BrokenPipeError, ConnectionResetError): + break # user closed the browser tab mid-run + proc.stdout.close() + proc.wait() + except Exception as exc: + try: + h.wfile.write( + f"\n[SERVER ERROR] Could not run Codecat: {exc}\n".encode("utf-8") + ) + except (BrokenPipeError, ConnectionResetError): + pass + + +def _handle_post_run(h: _Handler, project_path: Path) -> None: + """Execute ``codecat run`` and stream its output back to the client.""" + import tempfile + + if not project_path.is_dir(): + _send_head(h, 400, "text/plain; charset=utf-8") + h.wfile.write( + f"Error: project path is not a valid directory: {project_path}\n".encode( + "utf-8" + ) + ) + return + + raw = _read_post_body(h) + if raw is None: + return + + try: + data: dict[str, Any] = json.loads(raw.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as exc: + _send_head(h, 400, "text/plain; charset=utf-8") + h.wfile.write(f"Error: invalid JSON body: {exc}\n".encode("utf-8")) + return + + temp_cfg = _build_run_config(data) + + tmp_fd, tmp_path_str = tempfile.mkstemp( + suffix=".json", + prefix=".codecat_run_", + dir=project_path, + ) + tmp_config_path = Path(tmp_path_str) + try: + os.write(tmp_fd, json.dumps(temp_cfg).encode("utf-8")) + finally: + os.close(tmp_fd) + + cmd = _build_subprocess_cmd(project_path, tmp_config_path, bool(data.get("dryRun"))) + + _send_head(h, 200, "text/plain; charset=utf-8", {"Cache-Control": "no-cache"}) + + try: + _stream_subprocess(h, cmd, project_path) + finally: + try: + tmp_config_path.unlink(missing_ok=True) + except OSError: + pass + + +# --------------------------------------------------------------------------- +# Request handler factory +# --------------------------------------------------------------------------- + + +def _make_handler( + project_path: Path, +) -> type[http.server.BaseHTTPRequestHandler]: + """ + Return a ``BaseHTTPRequestHandler`` subclass bound to *project_path*. + + All logic lives in the module-level ``_handle_*`` functions; this class + is intentionally a thin delegation layer. Using a factory keeps + ``project_path`` out of shared class state so concurrent requests from + different server instances cannot interfere with each other. + """ + + class _RequestHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self) -> None: + _handle_get(self, project_path) + + def do_POST(self) -> None: + _handle_post(self, project_path) + + def log_message(self, fmt: str, *args: Any) -> None: + pass # suppress per-request log lines + + return _RequestHandler + + +# --------------------------------------------------------------------------- +# Port discovery +# --------------------------------------------------------------------------- + + +def _find_free_port(start: int, max_tries: int = 20) -> int: + """ + Return the first TCP port in ``[start, start + max_tries)`` that is + available on the loopback interface. + + Raises ``OSError`` if no free port is found within the range. + """ + for candidate in range(start, start + max_tries): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + try: + sock.bind(("127.0.0.1", candidate)) + return candidate + except OSError: + continue + raise OSError( + f"Could not find a free port in range" f" {start}\u2013{start + max_tries - 1}." + ) + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + + +class _ReuseAddrServer(socketserver.ThreadingTCPServer): + """ThreadingTCPServer with address reuse enabled and daemon threads.""" + + allow_reuse_address: bool = True + daemon_threads: bool = True + + +def start_web_app(port: int = 8080, project_path: Path = Path(".")) -> None: + """ + Start the Codecat web UI server and open it in the default browser. + + The server binds to ``127.0.0.1`` only and is never exposed to the + network. The function blocks until the user presses Ctrl+C. + """ + resolved = project_path.resolve() + actual_port = _find_free_port(port) + + server = _ReuseAddrServer( + ("127.0.0.1", actual_port), + _make_handler(resolved), + ) + + url = f"http://127.0.0.1:{actual_port}" + print(f"Codecat Web UI \u2192 {url}") + print(f"Target directory: {resolved}") + print("Press Ctrl+C to stop.\n") + + server_thread = threading.Thread(target=server.serve_forever, daemon=True) + server_thread.start() + webbrowser.open_new(url) + + try: + while True: + server_thread.join(1) + except KeyboardInterrupt: + print("\nShutting down\u2026") + server.shutdown() + server.server_close() + sys.exit(0) diff --git a/tests/test_cli_app.py b/tests/test_cli_app.py index 752feb3..2203ffc 100644 --- a/tests/test_cli_app.py +++ b/tests/test_cli_app.py @@ -53,7 +53,7 @@ def test_generate_config_aborts_if_user_says_no_to_overwrite( def test_generate_config_handles_io_error(tmp_path: Path, mocker, strip_ansi_codes): """Ensures `generate-config` handles I/O errors during file writing.""" - mocker.patch("builtins.open", side_effect=IOError("Disk full")) + mocker.patch("pathlib.Path.write_text", side_effect=IOError("Disk full")) result = runner.invoke( app, ["generate-config", "--output-dir", str(tmp_path)], input="y\n" ) diff --git a/tests/test_config.py b/tests/test_config.py index cd1e45e..5d877da 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -68,17 +68,19 @@ def test_cli_overrides_for_include_and_exclude_patterns(tmp_path: Path): assert config["exclude_patterns"] == ["*.map"] -def test_handling_a_malformed_json_config(tmp_path: Path, capsys): +def test_handling_a_malformed_json_config(tmp_path: Path, caplog): """Ensures a corrupt config file is handled gracefully without crashing.""" user_config_path = tmp_path / ".codecat_config.json" user_config_path.write_text("{ 'malformed': json, }") # Invalid JSON - config, loaded, _ = load_config(tmp_path) + import logging + + with caplog.at_level(logging.WARNING): + config, loaded, _ = load_config(tmp_path) + assert not loaded assert config["output_file"] == "codecat_output.md" # Falls back to default - - captured = capsys.readouterr() - assert "Notice: Could not load or parse config" in captured.err + assert "Notice: Could not load or parse config" in caplog.text def test_merging_language_hints(tmp_path: Path): diff --git a/tests/test_file_scanner.py b/tests/test_file_scanner.py index 17a9d66..1ce21e7 100644 --- a/tests/test_file_scanner.py +++ b/tests/test_file_scanner.py @@ -237,7 +237,7 @@ def test_scanning_a_subdirectory(tmp_path: Path): assert files == expected, "Scanning a subdirectory with relative excludes failed" -def test_verbose_output_for_skipped_items(tmp_path: Path, capsys, strip_ansi_codes): +def test_verbose_output_for_skipped_items(tmp_path: Path, caplog, strip_ansi_codes): """Ensures that verbose mode correctly logs the reasons for skipping files and dirs.""" structure = { "large_file.txt": "a" * 2048, @@ -251,11 +251,13 @@ def test_verbose_output_for_skipped_items(tmp_path: Path, capsys, strip_ansi_cod "exclude_dirs": ["docs"], "include_patterns": ["*.txt", "*.md"], } - run_scan_with_config(tmp_path, structure, config_overrides) - captured = capsys.readouterr() - stderr = strip_ansi_codes(captured.err) - assert "Skipping large file: large_file.txt" in stderr - assert "Skipping explicitly excluded file: explicitly_excluded.txt" in stderr + import logging + + with caplog.at_level(logging.DEBUG): + run_scan_with_config(tmp_path, structure, config_overrides) + + assert "Skipping large file: large_file.txt" in caplog.text + assert "Skipping explicitly excluded file: explicitly_excluded.txt" in caplog.text def test_scanner_handles_stat_error_gracefully(tmp_path: Path, mocker): diff --git a/tests/test_web_ui.py b/tests/test_web_ui.py new file mode 100644 index 0000000..37e24e1 --- /dev/null +++ b/tests/test_web_ui.py @@ -0,0 +1,426 @@ +# tests/test_web_ui.py + +""" +Tests for the web_ui module. + +Structure +--------- +Unit tests — pure functions that need no network: + _normalize_patterns, _build_page, _find_free_port, _build_run_config, + _build_subprocess_cmd + +Integration tests — spin up a real server on a random available port and + issue real HTTP requests with http.client. No subprocess is spawned for + ``/api/run``; only the error-path (invalid JSON, bad project path) is + covered to keep the test suite fast and hermetic. +""" + +import http.client +import json +import socket +import sys +import threading +from pathlib import Path + +import pytest + +from codecat.web_ui import ( + _build_page, + _build_run_config, + _build_subprocess_cmd, + _find_free_port, + _make_handler, + _normalize_patterns, + _ReuseAddrServer, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _free_port() -> int: + """Return an available loopback port for test servers.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +@pytest.fixture() +def server(tmp_path: Path): + """ + Spin up a real ThreadingTCPServer bound to a random port and yield + ``(host, port, project_path)``. The server is shut down after the test. + """ + port = _free_port() + httpd = _ReuseAddrServer(("127.0.0.1", port), _make_handler(tmp_path)) + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + yield "127.0.0.1", port, tmp_path + httpd.shutdown() + httpd.server_close() + + +def _get(host: str, port: int, path: str) -> tuple[int, bytes]: + conn = http.client.HTTPConnection(host, port, timeout=5) + conn.request("GET", path) + resp = conn.getresponse() + return resp.status, resp.read() + + +def _post( + host: str, port: int, path: str, body: bytes, content_type: str = "application/json" +) -> tuple[int, bytes]: + conn = http.client.HTTPConnection(host, port, timeout=5) + conn.request( + "POST", + path, + body=body, + headers={"Content-Type": content_type, "Content-Length": str(len(body))}, + ) + resp = conn.getresponse() + return resp.status, resp.read() + + +# --------------------------------------------------------------------------- +# Unit tests — _normalize_patterns +# --------------------------------------------------------------------------- + + +class TestNormalizePatterns: + def test_empty_list(self): + assert _normalize_patterns([]) == [] + + def test_simple_strings(self): + assert _normalize_patterns(["*.py", "*.js"]) == ["*.py", "*.js"] + + def test_comma_separated(self): + assert _normalize_patterns(["*.py, *.js"]) == ["*.py", "*.js"] + + def test_newline_separated(self): + assert _normalize_patterns(["*.py\n*.js"]) == ["*.py", "*.js"] + + def test_mixed_separators(self): + result = _normalize_patterns(["*.py,*.js\n*.ts"]) + assert result == ["*.py", "*.js", "*.ts"] + + def test_strips_whitespace(self): + assert _normalize_patterns([" *.py ", " *.js "]) == ["*.py", "*.js"] + + def test_drops_empty_entries(self): + assert _normalize_patterns(["*.py", "", " ", "*.js"]) == ["*.py", "*.js"] + + def test_non_string_items_are_coerced(self): + # The frontend may pass integers or None in rare cases. + result = _normalize_patterns([123, None]) + assert result == ["123", "None"] + + def test_glob_patterns_preserved(self): + patterns = [".github/*", "src/**/*.py"] + assert _normalize_patterns(patterns) == patterns + + def test_multiline_textarea_input(self): + """Simulates a raw textarea value sent from the frontend.""" + raw = ".github/*\nsrc/codecat/*.py\ntests/*.py" + assert _normalize_patterns([raw]) == [ + ".github/*", + "src/codecat/*.py", + "tests/*.py", + ] + + +# --------------------------------------------------------------------------- +# Unit tests — _build_page +# --------------------------------------------------------------------------- + + +class TestBuildPage: + def test_returns_bytes(self, tmp_path: Path): + assert isinstance(_build_page(tmp_path), bytes) + + def test_contains_doctype(self, tmp_path: Path): + assert b"" in _build_page(tmp_path) + + def test_path_is_json_encoded(self, tmp_path: Path): + page = _build_page(tmp_path).decode("utf-8") + path_json = json.dumps(str(tmp_path)) + assert f"window.CODECAT_PATH = {path_json};" in page + + def test_backslash_path_is_safe(self): + """Windows paths contain backslashes; json.dumps must escape them.""" + win_path = Path("C:\\Users\\joy\\my project\\codecat") + page = _build_page(win_path).decode("utf-8") + # The JSON encoding should use \\ for each backslash. + assert "C:\\\\Users\\\\joy" in page + + def test_path_with_special_chars(self): + """Paths that contain braces/quotes must not break the JS block.""" + tricky = Path('/tmp/proj{"key":"val"}') + page = _build_page(tricky).decode("utf-8") + # json.dumps escapes the inner quotes; the page must still be valid + assert "window.CODECAT_PATH" in page + assert "