From f05416dbb1e17867c1aae03457fdb0798bcd0a2c Mon Sep 17 00:00:00 2001 From: Mateusz Hajder <6783135+mhajder@users.noreply.github.com> Date: Wed, 3 Jun 2026 18:31:12 +0200 Subject: [PATCH] feat(watch): add --polling flag for SMB/CIFS/NFS mounted volumes --- README.md | 30 ++++++++++++++++++++++++++- docs/guide.md | 50 +++++++++++++++++++++++++++++++++++++++++++++ src/oikb/cli.py | 34 +++++++++++++++++++++++++++--- src/oikb/watcher.py | 21 +++++++++++++++++-- 4 files changed, 129 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cb74bda..d5c6056 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,9 @@ oikb sync github:owner/repo --kb-id your-kb-id # Preview first (no upload) oikb sync ./docs --kb-id your-kb-id --dry-run + +# Watch an SMB/CIFS/NFS mounted volume +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling ``` For multi-source, scheduled sync, or daemon mode — run `oikb init` to generate a `.oikb.yaml` config file, then `oikb daemon`. @@ -33,7 +36,7 @@ For multi-source, scheduled sync, or daemon mode — run `oikb init` to generate |---|---| | `oikb init` | Generate `.oikb.yaml` interactively | | `oikb sync ` | Incremental sync to a Knowledge Base | -| `oikb watch ` | Watch for changes and auto-sync | +| `oikb watch ` | Watch for changes and auto-sync (supports SMB/NFS via `--polling`) | | `oikb daemon` | Long-lived scheduler with HTTP API | | `oikb diff ` | Preview what a sync would do | | `oikb validate` | Validate `.oikb.yaml` without running | @@ -230,6 +233,31 @@ oikb history --clear --days 7 # Prune old entries OPEN_WEBUI_API_KEY: ${{ secrets.OPEN_WEBUI_API_KEY }} ``` +## Network Mounts (SMB / CIFS / NFS) + +By default, `oikb watch` uses native OS filesystem events (FSEvents on macOS, inotify on Linux) which do **not** work on network-mounted volumes — the kernel doesn't receive change notifications from a remote server. + +Use `--polling` to switch to a polling-based observer that detects changes via periodic `stat()` calls: + +```bash +# Basic SMB watch +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling + +# Faster detection (default: 5s) +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling --polling-interval 2 + +# Custom debounce (default: 3s in polling mode, 1s in native mode) +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling --debounce 5 +``` + +| Flag | Default | Description | +|---|---|---| +| `--polling` | off | Use PollingObserver instead of native events | +| `--polling-interval` | `5.0` | Seconds between filesystem polls (only with `--polling`) | +| `--debounce` | `1.0` / `3.0` | Quiet period before sync (auto-set to 3s with `--polling`) | + +> **Tip:** Lower `--polling-interval` detects changes faster but increases I/O on the network share. For most SMB setups, the 5s default is a good balance. + ## How It Works 1. Scan source, compute checksums diff --git a/docs/guide.md b/docs/guide.md index 7323dc0..58dc4f8 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -10,6 +10,7 @@ A complete guide to syncing content into Open WebUI Knowledge Bases. - [Getting Started](#getting-started) - [Your First Sync](#your-first-sync) - [Watch Mode](#watch-mode) + - [Network Mounts (SMB / CIFS / NFS)](#network-mounts-smb--cifs--nfs) - [Configuration File](#configuration-file) - [Generating with oikb init](#generating-with-oikb-init) - [Manual Setup](#manual-setup) @@ -116,6 +117,36 @@ oikb watch ./docs --kb-id your-kb-id Uses filesystem events (not polling) so changes are picked up instantly. +### Network Mounts (SMB / CIFS / NFS) + +Native filesystem events (FSEvents on macOS, inotify on Linux) do **not** work on network-mounted volumes — the OS kernel doesn't receive change notifications from a remote server. This means `oikb watch` without `--polling` will silently miss all changes on SMB/CIFS/NFS shares. + +Use `--polling` to switch to a polling-based observer that detects changes by periodically calling `stat()` on every file: + +```bash +# Watch an SMB-mounted directory +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling + +# Faster detection (check every 2 seconds instead of default 5) +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling --polling-interval 2 + +# Custom debounce (wait 5s of quiet time before triggering sync) +oikb watch /mnt/smb_share/docs --kb-id your-kb-id --polling --debounce 5 +``` + +#### Watch Options + +| Flag | Default | Description | +|---|---|---| +| `--polling` | off | Use PollingObserver instead of native filesystem events | +| `--polling-interval` | `5.0` | Seconds between filesystem polls (only used with `--polling`) | +| `--debounce` | `1.0` / `3.0` | Quiet period before triggering sync. Defaults to 3s with `--polling`, 1s with native events. Override explicitly if needed. | +| `--verbose` / `-v` | off | Show detailed sync progress | + +> **Performance note:** Lower `--polling-interval` detects changes faster but increases I/O on the network share. Each poll `stat()`s every file in the watched directory tree. For large directories (>10k files) over SMB, consider using `--polling-interval 10` or higher. + +> **Alternative:** If real-time detection isn't required, use `oikb daemon` with a scheduled interval instead (e.g. `interval: 5m`). The daemon uses time-based scheduling, not filesystem events, so it works on any filesystem. + --- ## Configuration File @@ -696,6 +727,9 @@ oikb sync --max-file-size 50mb Skip large files oikb sync --concurrency 4 Parallel uploads oikb sync --scan-secrets Block files with credentials oikb watch --kb-id ID Auto-sync on file change +oikb watch --kb-id ID --polling Watch with polling (SMB/CIFS/NFS) +oikb watch --polling --polling-interval 2 Faster poll frequency +oikb watch --polling --debounce 5 Custom debounce oikb daemon Start scheduled daemon oikb daemon --log-format json JSON logging oikb daemon --config /path/to/yaml Custom config path @@ -751,3 +785,19 @@ oikb validate --deep # Verify API + KB connectivity Open WebUI → Knowledge → click a KB → the ID is in the URL: `http://localhost:3000/knowledge/8f3a2b1c-1234-5678-9abc-def012345678` + +### Watch mode doesn't detect changes on SMB/CIFS/NFS + +By default, `oikb watch` uses native filesystem events which don't work on network-mounted volumes. Add `--polling`: + +```bash +oikb watch /mnt/smb_share --kb-id your-kb-id --polling +``` + +If changes are still missed, try lowering the polling interval: + +```bash +oikb watch /mnt/smb_share --kb-id your-kb-id --polling --polling-interval 2 +``` + +See [Network Mounts](#network-mounts-smb--cifs--nfs) for full details. diff --git a/src/oikb/cli.py b/src/oikb/cli.py index b7bffbc..0b700dd 100644 --- a/src/oikb/cli.py +++ b/src/oikb/cli.py @@ -561,8 +561,21 @@ def diff( @cli.command() @click.argument("directory") @common_options -@click.option("--debounce", default=1.0, type=float, help="Seconds to wait after last change (default: 1.0).") +@click.option("--debounce", default=None, type=float, help="Seconds to wait after last change (default: 1.0, or 3.0 with --polling).") @click.option("-v", "--verbose", is_flag=True, help="Show detailed progress.") +@click.option( + "--polling", + is_flag=True, + default=False, + help="Use PollingObserver instead of native filesystem events. " + "Required for SMB/CIFS/NFS network-mounted volumes.", +) +@click.option( + "--polling-interval", + default=5.0, + type=float, + help="How often (seconds) to poll for changes when --polling is used (default: 5.0).", +) @click.pass_context def watch( ctx: click.Context, @@ -570,17 +583,27 @@ def watch( url: str | None, token: str | None, kb: str | None, - debounce: float, + debounce: float | None, verbose: bool, + polling: bool, + polling_interval: float, ): """Watch a local directory and sync on changes. Runs continuously until interrupted (Ctrl+C). + + For SMB/CIFS/NFS mounted volumes, use --polling to enable + PollingObserver which detects changes via periodic stat() calls + instead of relying on kernel filesystem events. """ if not kb: click.echo(click.style("--kb-id is required.", fg="red"), err=True) sys.exit(1) + # Default debounce: 3s for polling (SMB writes arrive in bursts), 1s for native. + if debounce is None: + debounce = 3.0 if polling else 1.0 + quiet = ctx.obj.get("quiet", False) from oikb.watcher import watch_directory @@ -593,7 +616,10 @@ def watch( click.echo(click.style(str(e), fg="red"), err=True) sys.exit(1) - click.echo(f"Watching {directory} → KB {kb} (Ctrl+C to stop)") + mode = "polling" if polling else "native" + click.echo(f"Watching {directory} → KB {kb} (mode: {mode}, Ctrl+C to stop)") + if polling: + click.echo(f" Polling interval: {polling_interval}s, debounce: {debounce}s") def on_change(): try: @@ -616,6 +642,8 @@ def on_change(): on_change=on_change, debounce_seconds=debounce, ignore=DEFAULT_IGNORE, + polling=polling, + polling_interval=polling_interval, ) except FileNotFoundError as e: click.echo(click.style(f"Error: {e}", fg="red"), err=True) diff --git a/src/oikb/watcher.py b/src/oikb/watcher.py index de8f33b..a2b6f40 100644 --- a/src/oikb/watcher.py +++ b/src/oikb/watcher.py @@ -1,4 +1,8 @@ -"""File watcher — debounced fswatch integration for live sync.""" +"""File watcher — debounced fswatch integration for live sync. + +Supports both native filesystem events (FSEvents / inotify) and +PollingObserver for network-mounted volumes (SMB / CIFS / NFS). +""" from __future__ import annotations @@ -9,6 +13,7 @@ from watchdog.events import FileSystemEvent, FileSystemEventHandler from watchdog.observers import Observer +from watchdog.observers.polling import PollingObserver class _DebouncedHandler(FileSystemEventHandler): @@ -51,6 +56,8 @@ def watch_directory( on_change: Callable[[], None], debounce_seconds: float = 1.0, ignore: frozenset[str] | None = None, + polling: bool = False, + polling_interval: float = 5.0, ) -> None: """Watch a directory for changes and call on_change after debounce. @@ -61,6 +68,13 @@ def watch_directory( on_change: Callback fired after changes settle. debounce_seconds: Quiet period before triggering sync. ignore: File/dir names to ignore. + polling: Use PollingObserver instead of native events. + Required for SMB/CIFS/NFS network mounts where + kernel-level filesystem events are not available. + polling_interval: How often (seconds) the PollingObserver checks + for changes. Only used when polling=True. + Lower values detect changes faster but increase + I/O load on the network share. Default: 5.0. """ path = Path(directory).resolve() if not path.is_dir(): @@ -72,7 +86,10 @@ def watch_directory( ignore=ignore, ) - observer = Observer() + if polling: + observer = PollingObserver(timeout=polling_interval) + else: + observer = Observer() observer.schedule(handler, str(path), recursive=True) observer.start()