From af75511eb9ad8d59326b1e9876ca29b843a26136 Mon Sep 17 00:00:00 2001 From: Myles Dear Date: Mon, 13 Apr 2026 14:34:42 -0400 Subject: [PATCH 1/2] feat: local Docker sandbox infrastructure (1/3) - Docker Compose local stack with PostgreSQL, Redis, MinIO, sandbox - Local sandbox entrypoint, VNC, browser automation services - Stack control scripts (stack_control.sh, local/*) - Backend Dockerfile + entrypoint for local development - Configuration: .stack.env.local, settings.yaml, model_configs - SQLAlchemy model fixes (UUID consistency, TimestampColumn) - Agent tool/runtime improvements (reasoning_content, field renames) - Credit billing_enabled toggle + usage handler refactor - E2B sandbox management, VNC URL support - 246 tests (unit, integration, smoke, E2E) - Documentation: architecture, getting-started, local-docker-sandbox - GitHub Copilot instructions and prompt templates --- .github/copilot-instructions.md | 13 + .github/instructions/diagram.instructions.md | 572 +++++ .github/prompts/e2e-test-cycle.prompt.md | 272 ++ .gitignore | 136 +- AGENTS.md | 13 +- CLAUDE.md | 54 +- docker/.stack.env.local.example | 73 + docker/docker-compose.local.yaml | 152 ++ docker/frontend/Dockerfile | 16 +- docker/sandbox/pyproject.toml | 5 +- docker/sandbox/start-services.sh | 38 +- docs/docs/architecture-local-to-cloud.md | 533 ++++ docs/docs/core-infrastructure.md | 71 + docs/docs/feature-branch-analysis.md | 428 ++++ docs/docs/getting-started.md | 225 ++ docs/docs/local-docker-sandbox.md | 413 ++++ .../required-environment-variables/index.md | 123 + .../llm-auth.md | 70 + .../sandbox-server.md | 79 + docs/migration-knowledge.md | 170 ++ docs/rebase-analysis/01-path-mapping.md | 130 + docs/rebase-analysis/02-baseline-changes.md | 140 ++ .../03-three-way-assessment.md | 219 ++ docs/rebase-analysis/04-rebase-plan.md | 211 ++ docs/rebase-analysis/05-post-rebase-audit.md | 239 ++ docs/rebase-analysis/06-full-feature-audit.md | 315 +++ e2b.Dockerfile | 33 +- frontend/package.json | 7 +- frontend/pnpm-lock.yaml | 337 +++ frontend/src/app/routes/agent.tsx | 8 +- frontend/src/app/routes/dashboard.tsx | 9 +- frontend/src/app/routes/login.tsx | 55 +- .../src/components/agent/agent-result.tsx | 18 +- frontend/src/components/agent/agent-tab.tsx | 20 + frontend/src/components/agent/agent-task.tsx | 14 +- .../components/agent/subagent-container.tsx | 30 +- .../src/components/chat-header-mobile.tsx | 2 + frontend/src/components/chat-header.tsx | 5 + frontend/src/components/header.tsx | 2 + frontend/src/components/project-list.tsx | 25 + frontend/src/components/session-item.tsx | 7 +- .../src/components/share-agent-content.tsx | 8 +- frontend/src/components/sidebar.tsx | 31 +- frontend/src/constants/models.tsx | 10 + frontend/src/hooks/use-app-events.tsx | 58 +- .../hooks/use-navigation-leave-session.tsx | 2 + frontend/src/lib/__tests__/utils.test.ts | 132 + frontend/src/lib/utils.ts | 74 +- .../__tests__/agent-sandbox-status.test.ts | 35 + frontend/src/state/index.ts | 1 + frontend/src/state/slice/agent.ts | 8 + frontend/src/state/slice/sessions.ts | 47 + frontend/src/state/slice/workspace.ts | 8 + frontend/src/typings/agent.ts | 6 +- ...0260412_000004_add_session_delete_after.py | 36 + pyproject.toml | 6 +- scripts/html_to_pdf.py | 194 ++ scripts/local/create_template_from_images.py | 190 ++ scripts/local/migrate_events.py | 174 ++ scripts/local/migrate_old_db.py | 790 ++++++ scripts/local/migrate_remaining_data.py | 213 ++ scripts/local/rewrite_localhost_urls.py | 131 + scripts/local/stuck_task_control.sh | 313 +++ scripts/local/test_e2e.py | 2012 +++++++++++++++ scripts/local/test_session.py | 240 ++ scripts/local/upload_slide_assets.py | 234 ++ scripts/local/windows-port-forward.ps1 | 151 ++ scripts/stack_control.sh | 336 +++ src/ii_agent/agents/agent.py | 10 +- src/ii_agent/agents/models/metrics.py | 11 + .../agents/models/openai/responses.py | 5 +- src/ii_agent/agents/prompts/agent_prompts.py | 6 +- .../prompts/deep_research_system_prompt.py | 2 +- src/ii_agent/agents/prompts/system_prompt.py | 7 +- src/ii_agent/agents/sandboxes/__init__.py | 4 + src/ii_agent/agents/sandboxes/base.py | 11 +- src/ii_agent/agents/sandboxes/docker.py | 1235 ++++++++++ src/ii_agent/agents/sandboxes/docker_shell.py | 577 +++++ src/ii_agent/agents/sandboxes/e2b.py | 9 +- src/ii_agent/agents/sandboxes/exceptions.py | 6 +- src/ii_agent/agents/sandboxes/explorer.py | 5 +- .../agents/sandboxes/orphan_cleanup.py | 499 ++++ src/ii_agent/agents/sandboxes/port_manager.py | 688 ++++++ src/ii_agent/agents/sandboxes/schemas.py | 1 + src/ii_agent/agents/sandboxes/service.py | 32 +- src/ii_agent/agents/skills/prompt_db.py | 2 +- src/ii_agent/agents/skills/storage.py | 4 +- src/ii_agent/agents/tools/sandbox/base.py | 6 + .../agents/tools/shell/shell_run_command.py | 39 +- src/ii_agent/agents/tools/skill.py | 18 +- .../agents/tools/slide_system/hook_utils.py | 16 +- src/ii_agent/app/lifespan.py | 91 + src/ii_agent/app/routers.py | 6 + src/ii_agent/auth/router.py | 38 + src/ii_agent/chat/application/chat_service.py | 3 + .../chat/application/file_processor.py | 13 +- src/ii_agent/chat/llm/anthropic/provider.py | 19 +- src/ii_agent/chat/llm/gemini.py | 13 +- src/ii_agent/chat/llm/openai.py | 15 +- .../chat/media/handlers/video_handler.py | 2 +- src/ii_agent/chat/prompts/video_prompts.py | 4 +- src/ii_agent/chat/providers/models.py | 2 +- src/ii_agent/chat/vectorstore/openai.py | 13 +- .../content/slides/content_processor.py | 21 +- src/ii_agent/content/slides/repository.py | 2 - .../content/slides/templates/schemas.py | 4 +- .../content/storybook/ai_edit_service.py | 9 +- src/ii_agent/core/config/agent.py | 13 +- src/ii_agent/core/config/credits.py | 11 + src/ii_agent/core/config/sandbox.py | 73 + src/ii_agent/core/config/storage.py | 25 +- src/ii_agent/core/storage/client.py | 11 +- src/ii_agent/core/storage/providers/local.py | 100 - src/ii_agent/core/storage/providers/minio.py | 10 + src/ii_agent/credits/usage/handler.py | 4 + src/ii_agent/files/service.py | 11 +- src/ii_agent/files/slide_assets_router.py | 63 + src/ii_agent/files/storage_proxy_router.py | 94 + .../composio/auth_config_service.py | 9 +- .../composio/connected_account_service.py | 9 +- .../connectors/composio/mcp_server_service.py | 9 +- .../connectors/composio/toolkit_service.py | 38 +- src/ii_agent/projects/design/service.py | 4 +- src/ii_agent/realtime/events/app_events.py | 1 + src/ii_agent/realtime/events/converter.py | 8 +- .../realtime/handlers/awake_sandbox.py | 44 +- src/ii_agent/realtime/handlers/base.py | 25 +- src/ii_agent/realtime/handlers/cancel.py | 60 +- .../realtime/handlers/sandbox_status.py | 5 +- src/ii_agent/sessions/__init__.py | 2 + src/ii_agent/sessions/models.py | 1 + src/ii_agent/sessions/repository.py | 13 +- src/ii_agent/sessions/router.py | 37 +- src/ii_agent/sessions/schemas.py | 20 + src/ii_agent/sessions/service.py | 108 +- src/ii_agent/settings/llm/repository.py | 2 +- src/ii_agent/settings/llm/service.py | 2 +- src/tests/api/billing/test_credits_router.py | 3 +- src/tests/api/chat/test_chat_router.py | 10 +- src/tests/api/content/test_slides_router.py | 14 +- .../api/content/test_storybook_router.py | 8 +- .../api/content/test_storybook_router_api.py | 69 +- .../api/integrations/test_composio_router.py | 28 +- .../test_connectors_router_api.py | 4 +- .../api/sessions/test_sessions_router.py | 12 +- src/tests/api/settings/test_llm_router.py | 2 +- src/tests/conftest.py | 6 + .../test_auth_session_chat_flow.py | 130 - .../test_billing_webhook_lifecycle.py | 3 +- .../integration/test_file_upload_lifecycle.py | 62 +- .../integration/test_realtime_socket_flow.py | 25 +- .../test_settings_resolution_flow.py | 45 - src/tests/repositories/conftest.py | 43 +- .../test_auth_billing_repositories.py | 96 +- .../repositories/test_content_repositories.py | 43 +- ..._engine_files_integrations_repositories.py | 320 --- .../test_projects_repositories.py | 76 +- ...realtime_sessions_settings_repositories.py | 376 --- src/tests/smoke/test_realtime_billing.py | 9 +- src/tests/smoke/test_session_file_settings.py | 33 +- src/tests/smoke/test_startup_health.py | 5 +- src/tests/unit/__init__.py | 0 src/tests/unit/agent/__init__.py | 0 src/tests/unit/agent/test_agent_exceptions.py | 51 + src/tests/unit/agent/test_agent_utils.py | 64 + src/tests/unit/agent/test_claude_helpers.py | 130 + src/tests/unit/agent/test_docker_sandbox.py | 1633 ++++++++++++ .../test_docker_sandbox_readiness_config.py | 82 + src/tests/unit/agent/test_function_tool.py | 162 ++ src/tests/unit/agent/test_metrics.py | 197 ++ src/tests/unit/agent/test_orphan_cleanup.py | 889 +++++++ src/tests/unit/agent/test_port_manager.py | 899 +++++++ src/tests/unit/agent/test_prompt_rendering.py | 100 - src/tests/unit/agent/test_research_prompt.py | 62 + src/tests/unit/agent/test_run_input_output.py | 598 +++++ src/tests/unit/agent/test_run_messages.py | 63 + .../unit/agent/test_sandbox_exceptions.py | 56 + src/tests/unit/agent/test_sandbox_provider.py | 39 - src/tests/unit/agent/test_sandbox_schemas.py | 64 + src/tests/unit/agent/test_sandbox_settings.py | 79 + src/tests/unit/agent/test_session_summary.py | 184 ++ src/tests/unit/agent/test_timer.py | 29 + src/tests/unit/app/test_orphan_cleanup.py | 206 ++ src/tests/unit/app/test_routers_smoke.py | 21 + src/tests/unit/auth/test_auth_exceptions.py | 24 + .../unit/auth/test_auth_router_helpers.py | 174 ++ src/tests/unit/auth/test_auth_router_r4.py | 486 ---- src/tests/unit/auth/test_dependencies.py | 78 - src/tests/unit/auth/test_oidc_verify.py | 62 + src/tests/unit/auth/test_user_service.py | 138 -- src/tests/unit/auth/test_user_service_deep.py | 402 --- src/tests/unit/auth/test_waitlist.py | 57 - .../billing/test_billing_customer_service.py | 298 --- .../unit/billing/test_billing_service_pure.py | 393 +++ .../unit/billing/test_checkout_service.py | 103 - src/tests/unit/billing/test_credit_utils.py | 41 + .../unit/billing/test_handler_billing.py | 472 ---- src/tests/unit/billing/test_import_paths.py | 46 - src/tests/unit/billing/test_usage_service.py | 447 ---- .../unit/celery/test_manager_singleton.py | 22 - src/tests/unit/celery/test_tasks_storybook.py | 516 ---- .../unit/chat/test_anthropic_cache_control.py | 113 + .../unit/chat/test_chat_context_manager.py | 673 ----- src/tests/unit/chat/test_chat_dependencies.py | 199 -- .../unit/chat/test_chat_llm_anthropic_deep.py | 1145 --------- ...est_chat_llm_anthropic_prompt_converter.py | 572 ----- .../chat/test_chat_llm_anthropic_provider.py | 584 ----- src/tests/unit/chat/test_chat_llm_custom.py | 645 ----- .../unit/chat/test_chat_llm_custom_deep.py | 1038 -------- .../unit/chat/test_chat_llm_gemini_deep.py | 11 +- src/tests/unit/chat/test_chat_llm_openai.py | 745 ------ .../unit/chat/test_chat_llm_openai_deep.py | 1012 -------- src/tests/unit/chat/test_chat_llm_utils.py | 83 + .../unit/chat/test_chat_media_handlers.py | 396 --- src/tests/unit/chat/test_chat_media_modes.py | 607 ----- src/tests/unit/chat/test_chat_media_utils.py | 30 + .../chat/test_chat_message_history_service.py | 272 ++ src/tests/unit/chat/test_chat_router.py | 524 ---- src/tests/unit/chat/test_chat_service.py | 204 -- src/tests/unit/chat/test_chat_service_r4.py | 978 -------- src/tests/unit/chat/test_chat_vectorstore.py | 539 ---- .../unit/chat/test_context_manager_hooks.py | 35 - src/tests/unit/chat/test_council_service.py | 14 +- src/tests/unit/chat/test_file_processor.py | 86 + src/tests/unit/chat/test_llm_loop_service.py | 385 --- src/tests/unit/chat/test_media_registry.py | 81 + src/tests/unit/chat/test_message_service.py | 230 ++ src/tests/unit/chat/test_prompt_converter.py | 396 +++ src/tests/unit/chat/test_turn_loop_service.py | 294 +++ src/tests/unit/content/test_media_schemas.py | 17 + src/tests/unit/content/test_media_service.py | 80 - .../unit/content/test_nano_banana_service.py | 401 --- src/tests/unit/content/test_skill_service.py | 137 -- .../content/test_skills_seeding_coverage.py | 49 - .../content/test_slide_content_processor.py | 279 --- src/tests/unit/content/test_slides_deep.py | 561 ----- .../unit/content/test_slides_design_r4.py | 676 ----- .../test_slides_design_router_coverage.py | 162 -- .../content/test_slides_design_service.py | 537 ---- .../unit/content/test_slides_nano_banana.py | 586 ----- .../content/test_storybook_ai_edit_service.py | 478 ---- src/tests/unit/content/test_storybook_deep.py | 572 ----- .../content/test_storybook_edit_service.py | 456 ---- .../content/test_storybook_export_utils.py | 150 -- .../unit/content/test_storybook_exports_r4.py | 795 ------ .../unit/content/test_storybook_pdf_export.py | 408 --- .../content/test_storybook_router_coverage.py | 505 ---- .../unit/content/test_storybook_router_r4.py | 335 --- .../unit/content/test_storybook_service.py | 83 - src/tests/unit/core/test_config_credits.py | 41 + src/tests/unit/core/test_config_llm.py | 48 + src/tests/unit/core/test_config_mcp.py | 35 + src/tests/unit/core/test_config_oauth.py | 56 + src/tests/unit/core/test_config_sources.py | 153 ++ src/tests/unit/core/test_encryption.py | 205 ++ src/tests/unit/core/test_middleware.py | 81 - .../core/test_middleware_exception_handler.py | 142 ++ .../core/test_middleware_request_context.py | 68 + src/tests/unit/core/test_redis_cache_r4.py | 358 --- src/tests/unit/core/test_redis_cancel.py | 198 +- .../unit/core/test_secrets_encryption.py | 52 + src/tests/unit/core/test_settings.py | 37 - src/tests/unit/core/test_storage_client.py | 124 + .../unit/core/test_storage_path_resolver.py | 69 + src/tests/unit/credits/test_credit_models.py | 27 + .../unit/credits/test_credit_repository.py | 81 - src/tests/unit/credits/test_credit_service.py | 233 ++ .../unit/credits/test_credit_usage_handler.py | 112 + .../test_project_design_service_helpers.py | 454 ---- src/tests/unit/engine/test_agent_service.py | 36 - .../unit/engine/test_e2b_sandbox_manager.py | 395 --- .../unit/engine/test_execution_service.py | 87 - src/tests/unit/engine/test_ii_server_shell.py | 51 - src/tests/unit/engine/test_plan_milestones.py | 76 - src/tests/unit/engine/test_sandboxes_r4.py | 510 ---- .../engine/test_v1_agent_factory_skills.py | 74 - .../unit/engine/test_v1_agent_main_r4.py | 980 -------- .../engine/test_v1_agent_session_store.py | 617 ----- .../unit/engine/test_v1_agent_sessions.py | 557 ----- .../engine/test_v1_agent_sessions_deep.py | 209 -- .../unit/engine/test_v1_agents_agent_deep.py | 1485 ----------- .../engine/test_v1_agents_response_handler.py | 384 --- .../engine/test_v1_agents_tool_manager.py | 461 ---- src/tests/unit/engine/test_v1_events.py | 1041 -------- .../unit/engine/test_v1_factory_converter.py | 241 -- .../unit/engine/test_v1_factory_tools.py | 391 --- .../unit/engine/test_v1_function_model.py | 363 --- .../engine/test_v1_models_anthropic_claude.py | 145 ++ src/tests/unit/engine/test_v1_models_base.py | 283 --- .../unit/engine/test_v1_models_base_deep.py | 694 ------ .../unit/engine/test_v1_models_gemini_deep.py | 740 ------ .../engine/test_v1_models_google_gemini.py | 858 ------- .../test_v1_models_google_interactions.py | 875 ------- .../engine/test_v1_models_openai_responses.py | 13 +- .../engine/test_v1_models_vertexai_claude.py | 30 - src/tests/unit/engine/test_v1_run_agent.py | 645 ----- .../unit/engine/test_v1_run_agent_deep.py | 716 ------ src/tests/unit/engine/test_v1_sandboxes.py | 604 ----- .../unit/engine/test_v1_sessions_media_r4.py | 723 ------ .../unit/engine/test_v1_skills_builtin.py | 536 ---- .../engine/test_v1_tools_connectors_github.py | 626 ----- .../engine/test_v1_tools_connectors_r4.py | 743 ------ .../engine/test_v1_tools_function_deep.py | 960 -------- src/tests/unit/engine/test_v1_tools_misc.py | 1226 --------- .../unit/engine/test_v1_tools_misc_r4.py | 1145 --------- .../unit/files/test_agent_file_helpers.py | 59 - src/tests/unit/files/test_file_exceptions.py | 37 + src/tests/unit/files/test_file_router.py | 485 ---- .../unit/files/test_file_service_deep.py | 1 + src/tests/unit/files/test_media_library.py | 64 - src/tests/unit/files/test_signed_url_batch.py | 90 - .../unit/files/test_storage_proxy_router.py | 217 ++ src/tests/unit/files/test_upload_flow.py | 99 - .../unit/integrations/test_a2a_as_client.py | 1058 -------- .../unit/integrations/test_a2a_as_server.py | 465 ---- .../unit/integrations/test_a2a_client_r4.py | 712 ------ .../unit/integrations/test_composio_client.py | 71 + .../unit/integrations/test_composio_r4.py | 872 ------- .../integrations/test_composio_service.py | 352 --- .../test_connectors_revenuecat.py | 129 - .../integrations/test_connectors_router.py | 494 ---- .../test_connectors_tools_loader.py | 257 -- .../test_enhance_prompt_coverage.py | 226 -- .../unit/integrations/test_mcp_sse_agent.py | 465 ---- .../unit/integrations/test_mcp_sse_events.py | 756 ------ .../unit/integrations/test_mcp_sse_mount.py | 36 - .../unit/integrations/test_mcp_sse_oauth.py | 854 ------- .../unit/integrations/test_mcp_sse_r4.py | 793 ------ .../integrations/test_mcp_sse_wellknown.py | 295 --- src/tests/unit/mobile/test_apple_service.py | 228 -- src/tests/unit/plans/test_plan_types.py | 14 + .../unit/projects/test_database_service.py | 136 - src/tests/unit/projects/test_deployments.py | 581 ----- .../unit/projects/test_deployments_service.py | 146 -- .../unit/projects/test_design_service.py | 809 ------ .../unit/projects/test_design_service_r4.py | 1239 ---------- .../projects/test_project_router_coverage.py | 490 ---- .../unit/projects/test_project_schemas.py | 320 +-- .../unit/projects/test_project_service.py | 95 - .../unit/projects/test_projects_misc_r4.py | 445 ---- .../unit/projects/test_subdomain_service.py | 187 -- .../unit/realtime/test_cancel_handler.py | 285 ++- .../unit/realtime/test_database_subscriber.py | 131 - .../test_design_state_socket_handlers.py | 276 --- src/tests/unit/realtime/test_event_bus.py | 278 --- .../unit/realtime/test_event_converter.py | 300 +++ src/tests/unit/realtime/test_event_service.py | 46 - .../realtime/test_event_stream_filters.py | 45 - .../unit/realtime/test_events_publisher_r4.py | 382 --- .../unit/realtime/test_handler_factory.py | 41 - .../realtime/test_memory_session_store.py | 236 ++ .../unit/realtime/test_pubsub_singleton.py | 73 + .../unit/realtime/test_realtime_schemas.py | 63 + .../realtime/test_socket_command_handlers.py | 517 ---- src/tests/unit/realtime/test_socket_deep.py | 265 -- .../unit/realtime/test_socket_handlers_r4.py | 2181 ----------------- .../unit/realtime/test_socket_schemas.py | 564 ----- .../realtime/test_socket_session_store.py | 372 --- .../unit/realtime/test_socket_socketio.py | 552 ----- .../unit/realtime/test_socketio_manager.py | 121 - src/tests/unit/realtime/test_socketio_r4.py | 770 ------ .../test_submit_testflight_handler.py | 244 -- .../unit/realtime/test_subscribers_r4.py | 616 ----- .../test_workspace_explorer_service.py | 35 + .../unit/scripts/test_stuck_task_control.py | 50 + .../sessions/test_session_plan_updates.py | 129 - .../unit/sessions/test_session_router.py | 670 ----- .../unit/sessions/test_session_service.py | 558 ++++- .../sessions/test_session_service_deep.py | 670 ----- .../sessions/test_session_title_service.py | 215 ++ .../unit/sessions/test_validation_service.py | 251 -- .../unit/settings/test_llm_resolution.py | 102 - src/tests/unit/settings/test_llm_seeding.py | 440 ++-- .../unit/settings/test_llm_service_deep.py | 684 ------ .../unit/settings/test_llm_setting_service.py | 86 - .../unit/settings/test_mcp_oauth_helpers.py | 55 - src/tests/unit/settings/test_mcp_schemas.py | 153 -- .../unit/settings/test_mcp_service_deep.py | 699 ------ .../unit/settings/test_settings_repos_r4.py | 508 ---- src/tests/unit/settings/test_skills_loader.py | 443 ++++ .../unit/storage/test_minio_error_handling.py | 56 + src/tests/unit/tasks/test_task_service.py | 283 +++ .../unit/tasks/test_task_service_cache.py | 130 - src/tests/unit/users/test_user_schemas.py | 43 + .../unit/workers/test_celery_broker_url.py | 73 + .../unit/workers/test_celery_tasks_r4.py | 398 --- src/tests/unit/workers/test_cron_tasks_r4.py | 742 ------ .../workers/test_extend_sandbox_timeout.py | 412 ++-- uv.lock | 71 +- 389 files changed, 27707 insertions(+), 69207 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 .github/instructions/diagram.instructions.md create mode 100644 .github/prompts/e2e-test-cycle.prompt.md create mode 100644 docker/.stack.env.local.example create mode 100644 docker/docker-compose.local.yaml create mode 100644 docs/docs/architecture-local-to-cloud.md create mode 100644 docs/docs/core-infrastructure.md create mode 100644 docs/docs/feature-branch-analysis.md create mode 100644 docs/docs/getting-started.md create mode 100644 docs/docs/local-docker-sandbox.md create mode 100644 docs/docs/required-environment-variables/index.md create mode 100644 docs/docs/required-environment-variables/llm-auth.md create mode 100644 docs/docs/required-environment-variables/sandbox-server.md create mode 100644 docs/migration-knowledge.md create mode 100644 docs/rebase-analysis/01-path-mapping.md create mode 100644 docs/rebase-analysis/02-baseline-changes.md create mode 100644 docs/rebase-analysis/03-three-way-assessment.md create mode 100644 docs/rebase-analysis/04-rebase-plan.md create mode 100644 docs/rebase-analysis/05-post-rebase-audit.md create mode 100644 docs/rebase-analysis/06-full-feature-audit.md create mode 100644 frontend/src/lib/__tests__/utils.test.ts create mode 100644 frontend/src/state/__tests__/agent-sandbox-status.test.ts create mode 100644 migrations/versions/20260412_000004_add_session_delete_after.py create mode 100755 scripts/html_to_pdf.py create mode 100644 scripts/local/create_template_from_images.py create mode 100644 scripts/local/migrate_events.py create mode 100644 scripts/local/migrate_old_db.py create mode 100644 scripts/local/migrate_remaining_data.py create mode 100644 scripts/local/rewrite_localhost_urls.py create mode 100755 scripts/local/stuck_task_control.sh create mode 100644 scripts/local/test_e2e.py create mode 100644 scripts/local/test_session.py create mode 100644 scripts/local/upload_slide_assets.py create mode 100644 scripts/local/windows-port-forward.ps1 create mode 100755 scripts/stack_control.sh create mode 100644 src/ii_agent/agents/sandboxes/docker.py create mode 100644 src/ii_agent/agents/sandboxes/docker_shell.py create mode 100644 src/ii_agent/agents/sandboxes/orphan_cleanup.py create mode 100644 src/ii_agent/agents/sandboxes/port_manager.py delete mode 100644 src/ii_agent/core/storage/providers/local.py create mode 100644 src/ii_agent/files/slide_assets_router.py create mode 100644 src/ii_agent/files/storage_proxy_router.py delete mode 100644 src/tests/integration/test_auth_session_chat_flow.py delete mode 100644 src/tests/integration/test_settings_resolution_flow.py delete mode 100644 src/tests/repositories/test_engine_files_integrations_repositories.py delete mode 100644 src/tests/repositories/test_realtime_sessions_settings_repositories.py create mode 100644 src/tests/unit/__init__.py create mode 100644 src/tests/unit/agent/__init__.py create mode 100644 src/tests/unit/agent/test_agent_exceptions.py create mode 100644 src/tests/unit/agent/test_agent_utils.py create mode 100644 src/tests/unit/agent/test_claude_helpers.py create mode 100644 src/tests/unit/agent/test_docker_sandbox.py create mode 100644 src/tests/unit/agent/test_docker_sandbox_readiness_config.py create mode 100644 src/tests/unit/agent/test_function_tool.py create mode 100644 src/tests/unit/agent/test_metrics.py create mode 100644 src/tests/unit/agent/test_orphan_cleanup.py create mode 100644 src/tests/unit/agent/test_port_manager.py delete mode 100644 src/tests/unit/agent/test_prompt_rendering.py create mode 100644 src/tests/unit/agent/test_research_prompt.py create mode 100644 src/tests/unit/agent/test_run_input_output.py create mode 100644 src/tests/unit/agent/test_run_messages.py create mode 100644 src/tests/unit/agent/test_sandbox_exceptions.py delete mode 100644 src/tests/unit/agent/test_sandbox_provider.py create mode 100644 src/tests/unit/agent/test_sandbox_schemas.py create mode 100644 src/tests/unit/agent/test_sandbox_settings.py create mode 100644 src/tests/unit/agent/test_session_summary.py create mode 100644 src/tests/unit/agent/test_timer.py create mode 100644 src/tests/unit/app/test_orphan_cleanup.py create mode 100644 src/tests/unit/app/test_routers_smoke.py create mode 100644 src/tests/unit/auth/test_auth_exceptions.py create mode 100644 src/tests/unit/auth/test_auth_router_helpers.py delete mode 100644 src/tests/unit/auth/test_auth_router_r4.py delete mode 100644 src/tests/unit/auth/test_dependencies.py create mode 100644 src/tests/unit/auth/test_oidc_verify.py delete mode 100644 src/tests/unit/auth/test_user_service.py delete mode 100644 src/tests/unit/auth/test_user_service_deep.py delete mode 100644 src/tests/unit/auth/test_waitlist.py delete mode 100644 src/tests/unit/billing/test_billing_customer_service.py create mode 100644 src/tests/unit/billing/test_billing_service_pure.py delete mode 100644 src/tests/unit/billing/test_checkout_service.py delete mode 100644 src/tests/unit/billing/test_handler_billing.py delete mode 100644 src/tests/unit/billing/test_import_paths.py delete mode 100644 src/tests/unit/billing/test_usage_service.py delete mode 100644 src/tests/unit/celery/test_manager_singleton.py delete mode 100644 src/tests/unit/celery/test_tasks_storybook.py create mode 100644 src/tests/unit/chat/test_anthropic_cache_control.py delete mode 100644 src/tests/unit/chat/test_chat_context_manager.py delete mode 100644 src/tests/unit/chat/test_chat_dependencies.py delete mode 100644 src/tests/unit/chat/test_chat_llm_anthropic_deep.py delete mode 100644 src/tests/unit/chat/test_chat_llm_anthropic_prompt_converter.py delete mode 100644 src/tests/unit/chat/test_chat_llm_anthropic_provider.py delete mode 100644 src/tests/unit/chat/test_chat_llm_custom.py delete mode 100644 src/tests/unit/chat/test_chat_llm_custom_deep.py delete mode 100644 src/tests/unit/chat/test_chat_llm_openai.py delete mode 100644 src/tests/unit/chat/test_chat_llm_openai_deep.py create mode 100644 src/tests/unit/chat/test_chat_llm_utils.py delete mode 100644 src/tests/unit/chat/test_chat_media_handlers.py delete mode 100644 src/tests/unit/chat/test_chat_media_modes.py create mode 100644 src/tests/unit/chat/test_chat_message_history_service.py delete mode 100644 src/tests/unit/chat/test_chat_router.py delete mode 100644 src/tests/unit/chat/test_chat_service.py delete mode 100644 src/tests/unit/chat/test_chat_service_r4.py delete mode 100644 src/tests/unit/chat/test_chat_vectorstore.py delete mode 100644 src/tests/unit/chat/test_context_manager_hooks.py delete mode 100644 src/tests/unit/chat/test_llm_loop_service.py create mode 100644 src/tests/unit/chat/test_media_registry.py create mode 100644 src/tests/unit/chat/test_message_service.py create mode 100644 src/tests/unit/chat/test_prompt_converter.py create mode 100644 src/tests/unit/chat/test_turn_loop_service.py create mode 100644 src/tests/unit/content/test_media_schemas.py delete mode 100644 src/tests/unit/content/test_media_service.py delete mode 100644 src/tests/unit/content/test_nano_banana_service.py delete mode 100644 src/tests/unit/content/test_skill_service.py delete mode 100644 src/tests/unit/content/test_skills_seeding_coverage.py delete mode 100644 src/tests/unit/content/test_slide_content_processor.py delete mode 100644 src/tests/unit/content/test_slides_deep.py delete mode 100644 src/tests/unit/content/test_slides_design_r4.py delete mode 100644 src/tests/unit/content/test_slides_design_router_coverage.py delete mode 100644 src/tests/unit/content/test_slides_design_service.py delete mode 100644 src/tests/unit/content/test_slides_nano_banana.py delete mode 100644 src/tests/unit/content/test_storybook_ai_edit_service.py delete mode 100644 src/tests/unit/content/test_storybook_deep.py delete mode 100644 src/tests/unit/content/test_storybook_edit_service.py delete mode 100644 src/tests/unit/content/test_storybook_export_utils.py delete mode 100644 src/tests/unit/content/test_storybook_exports_r4.py delete mode 100644 src/tests/unit/content/test_storybook_pdf_export.py delete mode 100644 src/tests/unit/content/test_storybook_router_coverage.py delete mode 100644 src/tests/unit/content/test_storybook_router_r4.py delete mode 100644 src/tests/unit/content/test_storybook_service.py create mode 100644 src/tests/unit/core/test_config_credits.py create mode 100644 src/tests/unit/core/test_config_llm.py create mode 100644 src/tests/unit/core/test_config_mcp.py create mode 100644 src/tests/unit/core/test_config_oauth.py create mode 100644 src/tests/unit/core/test_config_sources.py create mode 100644 src/tests/unit/core/test_encryption.py delete mode 100644 src/tests/unit/core/test_middleware.py create mode 100644 src/tests/unit/core/test_middleware_exception_handler.py create mode 100644 src/tests/unit/core/test_middleware_request_context.py delete mode 100644 src/tests/unit/core/test_redis_cache_r4.py create mode 100644 src/tests/unit/core/test_secrets_encryption.py delete mode 100644 src/tests/unit/core/test_settings.py create mode 100644 src/tests/unit/core/test_storage_client.py create mode 100644 src/tests/unit/core/test_storage_path_resolver.py create mode 100644 src/tests/unit/credits/test_credit_models.py delete mode 100644 src/tests/unit/credits/test_credit_repository.py create mode 100644 src/tests/unit/credits/test_credit_service.py create mode 100644 src/tests/unit/credits/test_credit_usage_handler.py delete mode 100644 src/tests/unit/design/test_project_design_service_helpers.py delete mode 100644 src/tests/unit/engine/test_agent_service.py delete mode 100644 src/tests/unit/engine/test_e2b_sandbox_manager.py delete mode 100644 src/tests/unit/engine/test_execution_service.py delete mode 100644 src/tests/unit/engine/test_ii_server_shell.py delete mode 100644 src/tests/unit/engine/test_plan_milestones.py delete mode 100644 src/tests/unit/engine/test_sandboxes_r4.py delete mode 100644 src/tests/unit/engine/test_v1_agent_factory_skills.py delete mode 100644 src/tests/unit/engine/test_v1_agent_main_r4.py delete mode 100644 src/tests/unit/engine/test_v1_agent_session_store.py delete mode 100644 src/tests/unit/engine/test_v1_agent_sessions.py delete mode 100644 src/tests/unit/engine/test_v1_agent_sessions_deep.py delete mode 100644 src/tests/unit/engine/test_v1_agents_agent_deep.py delete mode 100644 src/tests/unit/engine/test_v1_agents_response_handler.py delete mode 100644 src/tests/unit/engine/test_v1_agents_tool_manager.py delete mode 100644 src/tests/unit/engine/test_v1_events.py delete mode 100644 src/tests/unit/engine/test_v1_factory_converter.py delete mode 100644 src/tests/unit/engine/test_v1_factory_tools.py delete mode 100644 src/tests/unit/engine/test_v1_function_model.py delete mode 100644 src/tests/unit/engine/test_v1_models_base.py delete mode 100644 src/tests/unit/engine/test_v1_models_base_deep.py delete mode 100644 src/tests/unit/engine/test_v1_models_gemini_deep.py delete mode 100644 src/tests/unit/engine/test_v1_models_google_gemini.py delete mode 100644 src/tests/unit/engine/test_v1_models_google_interactions.py delete mode 100644 src/tests/unit/engine/test_v1_models_vertexai_claude.py delete mode 100644 src/tests/unit/engine/test_v1_run_agent.py delete mode 100644 src/tests/unit/engine/test_v1_run_agent_deep.py delete mode 100644 src/tests/unit/engine/test_v1_sandboxes.py delete mode 100644 src/tests/unit/engine/test_v1_sessions_media_r4.py delete mode 100644 src/tests/unit/engine/test_v1_skills_builtin.py delete mode 100644 src/tests/unit/engine/test_v1_tools_connectors_github.py delete mode 100644 src/tests/unit/engine/test_v1_tools_connectors_r4.py delete mode 100644 src/tests/unit/engine/test_v1_tools_function_deep.py delete mode 100644 src/tests/unit/engine/test_v1_tools_misc.py delete mode 100644 src/tests/unit/engine/test_v1_tools_misc_r4.py delete mode 100644 src/tests/unit/files/test_agent_file_helpers.py create mode 100644 src/tests/unit/files/test_file_exceptions.py delete mode 100644 src/tests/unit/files/test_file_router.py delete mode 100644 src/tests/unit/files/test_media_library.py delete mode 100644 src/tests/unit/files/test_signed_url_batch.py create mode 100644 src/tests/unit/files/test_storage_proxy_router.py delete mode 100644 src/tests/unit/files/test_upload_flow.py delete mode 100644 src/tests/unit/integrations/test_a2a_as_client.py delete mode 100644 src/tests/unit/integrations/test_a2a_as_server.py delete mode 100644 src/tests/unit/integrations/test_a2a_client_r4.py create mode 100644 src/tests/unit/integrations/test_composio_client.py delete mode 100644 src/tests/unit/integrations/test_composio_r4.py delete mode 100644 src/tests/unit/integrations/test_composio_service.py delete mode 100644 src/tests/unit/integrations/test_connectors_revenuecat.py delete mode 100644 src/tests/unit/integrations/test_connectors_router.py delete mode 100644 src/tests/unit/integrations/test_connectors_tools_loader.py delete mode 100644 src/tests/unit/integrations/test_enhance_prompt_coverage.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_agent.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_events.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_mount.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_oauth.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_r4.py delete mode 100644 src/tests/unit/integrations/test_mcp_sse_wellknown.py delete mode 100644 src/tests/unit/mobile/test_apple_service.py create mode 100644 src/tests/unit/plans/test_plan_types.py delete mode 100644 src/tests/unit/projects/test_database_service.py delete mode 100644 src/tests/unit/projects/test_deployments.py delete mode 100644 src/tests/unit/projects/test_deployments_service.py delete mode 100644 src/tests/unit/projects/test_design_service.py delete mode 100644 src/tests/unit/projects/test_design_service_r4.py delete mode 100644 src/tests/unit/projects/test_project_router_coverage.py delete mode 100644 src/tests/unit/projects/test_project_service.py delete mode 100644 src/tests/unit/projects/test_projects_misc_r4.py delete mode 100644 src/tests/unit/projects/test_subdomain_service.py delete mode 100644 src/tests/unit/realtime/test_database_subscriber.py delete mode 100644 src/tests/unit/realtime/test_design_state_socket_handlers.py delete mode 100644 src/tests/unit/realtime/test_event_bus.py create mode 100644 src/tests/unit/realtime/test_event_converter.py delete mode 100644 src/tests/unit/realtime/test_event_service.py delete mode 100644 src/tests/unit/realtime/test_event_stream_filters.py delete mode 100644 src/tests/unit/realtime/test_events_publisher_r4.py delete mode 100644 src/tests/unit/realtime/test_handler_factory.py create mode 100644 src/tests/unit/realtime/test_memory_session_store.py create mode 100644 src/tests/unit/realtime/test_pubsub_singleton.py create mode 100644 src/tests/unit/realtime/test_realtime_schemas.py delete mode 100644 src/tests/unit/realtime/test_socket_command_handlers.py delete mode 100644 src/tests/unit/realtime/test_socket_deep.py delete mode 100644 src/tests/unit/realtime/test_socket_handlers_r4.py delete mode 100644 src/tests/unit/realtime/test_socket_schemas.py delete mode 100644 src/tests/unit/realtime/test_socket_session_store.py delete mode 100644 src/tests/unit/realtime/test_socket_socketio.py delete mode 100644 src/tests/unit/realtime/test_socketio_manager.py delete mode 100644 src/tests/unit/realtime/test_socketio_r4.py delete mode 100644 src/tests/unit/realtime/test_submit_testflight_handler.py delete mode 100644 src/tests/unit/realtime/test_subscribers_r4.py create mode 100644 src/tests/unit/scripts/test_stuck_task_control.py delete mode 100644 src/tests/unit/sessions/test_session_plan_updates.py delete mode 100644 src/tests/unit/sessions/test_session_router.py delete mode 100644 src/tests/unit/sessions/test_session_service_deep.py create mode 100644 src/tests/unit/sessions/test_session_title_service.py delete mode 100644 src/tests/unit/sessions/test_validation_service.py delete mode 100644 src/tests/unit/settings/test_llm_resolution.py delete mode 100644 src/tests/unit/settings/test_llm_service_deep.py delete mode 100644 src/tests/unit/settings/test_llm_setting_service.py delete mode 100644 src/tests/unit/settings/test_mcp_oauth_helpers.py delete mode 100644 src/tests/unit/settings/test_mcp_schemas.py delete mode 100644 src/tests/unit/settings/test_mcp_service_deep.py delete mode 100644 src/tests/unit/settings/test_settings_repos_r4.py create mode 100644 src/tests/unit/settings/test_skills_loader.py create mode 100644 src/tests/unit/storage/test_minio_error_handling.py create mode 100644 src/tests/unit/tasks/test_task_service.py delete mode 100644 src/tests/unit/tasks/test_task_service_cache.py create mode 100644 src/tests/unit/users/test_user_schemas.py create mode 100644 src/tests/unit/workers/test_celery_broker_url.py delete mode 100644 src/tests/unit/workers/test_celery_tasks_r4.py delete mode 100644 src/tests/unit/workers/test_cron_tasks_r4.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..db47159fe --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,13 @@ +# Do not use base docker compose commands to do any kind of stack operations. +# Instructions on restarting and rebuilding the stack: +# Use the following tool preferentially and prefer --local mode: +scripts/stack_control.sh + +# Other scripts are also available to you under: +scripts/local/* + +# Credentials are available in +docker/.stack.env.local + +# Python venv is located in +~/workspaces/venvs/ii-agent diff --git a/.github/instructions/diagram.instructions.md b/.github/instructions/diagram.instructions.md new file mode 100644 index 000000000..a9a1d7534 --- /dev/null +++ b/.github/instructions/diagram.instructions.md @@ -0,0 +1,572 @@ +--- +applyTo: "**/*.md" +--- + +# Diagrams + +Use Mermaid diagrams instead of ASCII art in all markdown files. Generate GitHub Markdown +compatible Mermaid using only supported features: HEX colors, standard shapes, basic text +formatting. + +- Use Mermaid charts with actual class/interface names in blocks and method/member names in arrows +- If pImpl pattern is used, merge interface class and impl into one block and name it e.g. `SoaMaster(Impl)` + +--- + +## Supported Features + +**Colors:** Apply via `classDef`/`class` (fill/stroke HEX), `linkStyle` (stroke HEX, width, dasharray) + +**Shapes:** Rectangle `[Label]`, circle `((Label))`, stadium `([Label])`, diamond `{Label}`, +subroutine `[[Label]]`, parallelogram `/Label/` + +**Arrows:** Solid `-->`, dotted `-.->`, thick `==>`, open `--o`. Customize with `linkStyle` + +**Directions:** `TD` (top-down), `LR` (left-right), `RL` (right-left), `BT` (bottom-top) + +**Text:** Bold `**text**`, italic `_text_`, line breaks `
` (labels only). No per-label font +size/underline/family + +--- + +## Required Theme Configuration + +Every Mermaid diagram MUST include this init directive on the first line: + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal'}}}%% +``` + +- **CRITICAL:** Use `base` theme for automatic GitHub light/dark mode adaptation +- **REQUIRED:** Arial 13px normal weight prevents text cutoff and ensures readability across platforms +- **REQUIRED:** Use `classDef` with fill and stroke only — no explicit `color:#` text color +- **CRITICAL:** Avoid explicit `color:#` specifications as they conflict with automatic theme adaptation +- **NEVER** use explicit text color specifications that override automatic theme adaptation + +--- + +## Dark/Light Mode Compatibility + +These diagrams must render professionally across three targets: + +1. **VS Code** — Markdown Preview Enhanced with GitHub light and dark preview themes +2. **Prince PDF** — exported from Markdown Preview Enhanced (light background) +3. **GitHub** — viewed in both light and dark mode + +### Design Principles + +- For **hierarchical diagrams**, use alpha-transparent fills (8-digit hex `#RRGGBBAA`) on container + subgraphs. This produces automatic bi-directional hierarchy: darker inward on light backgrounds, + lighter inward on dark backgrounds +- For **flat diagrams** and **innermost nodes**, use solid medium-tone fills (45–75% lightness) +- Do NOT specify `color:#` in any `classDef` — let the renderer handle text color +- Use HEX values only — 6-digit (`#RRGGBB`) or 8-digit (`#RRGGBBAA`). No CSS color names, no + `rgba()`, no gradients +- Stroke colors should use higher alpha than their corresponding fill for border definition +- All solid fills must have sufficient contrast against both `#ffffff` (light) and `#0d1117` (dark) + backgrounds + +### Recommended Base Fill Colors (Non-Hierarchical Diagrams) + +Medium tones that adapt automatically to both light and dark themes: + +| Purpose | Fill | Stroke | +|---------|------|--------| +| Primary (blue) | `#4a90d9` | `#2c6cb0` | +| Success (green) | `#34a870` | `#1e8850` | +| Warning (orange) | `#e8a838` | `#c08828` | +| Danger (red) | `#d06050` | `#a84838` | +| Purple | `#8e6aad` | `#6e4a8d` | +| Blue-gray | `#5a7a90` | `#3e5e74` | + +--- + +## Hierarchical Diagram Color System + +Many diagrams require up to **four levels of nesting** using subgraphs. Use the alpha-transparent +palette below to create clear visual hierarchy that adapts to both light and dark backgrounds. + +### How It Works + +Container subgraphs use **alpha-transparent fills** (8-digit hex: `#RRGGBBAA`) on a single +base color. The renderer composites these against the page background, automatically creating +bi-directional hierarchy: + +- **Light mode (white background):** Low-alpha outer containers composite to near-white; + higher-alpha inner containers composite to progressively darker shades — subtle to prominent +- **Dark mode (dark background):** Low-alpha outer containers composite to near-black; + higher-alpha inner containers composite to progressively lighter shades — subtle to prominent + +Innermost nodes (Level 4) use **full-opacity solid fills** at ~50–55% lightness, ensuring they +stand out against both backgrounds. + +### Universal Hierarchy Palette + +Container subgraphs (Levels 1–3) share a base blue-gray with increasing alpha. Level 4 nodes +are fully opaque: + +| Level | Role | Fill | Stroke | Alpha | +|-------|------|------|--------|-------| +| **L1** | Outermost container | `#5888a833` | `#3c6c904D` | 20% / 30% | +| **L2** | Section container | `#5888a866` | `#3c6c908C` | 40% / 55% | +| **L3** | Module container | `#5888a8A6` | `#3c6c90CC` | 65% / 80% | +| **L4** | Nodes (primary) | `#5888a8` | `#3c6c90` | 100% | + +**Effective appearance after compositing on light (`#ffffff`) and dark (`#0d1117`) backgrounds:** + +| Level | On Light BG | On Dark BG | +|-------|-------------|------------| +| **L1** | `#dee7ee` (very light, subtle) | `#1c2934` (very dark, subtle) | +| **L2** | `#bccfdc` (light) | `#2b4151` (dark) | +| **L3** | `#92b1c6` (medium-light) | `#3e5e75` (medium-dark) | +| **L4** | `#5888a8` (solid, prominent) | `#5888a8` (solid, prominent) | + +### Additional Node Variants (Level 4) + +Use these for semantic differentiation among nodes at the innermost level: + +| Variant | Fill | Stroke | Use For | +|---------|------|--------|---------| +| Blue (default) | `#5888a8` | `#3c6c90` | Standard components | +| Green | `#58a888` | `#3c906c` | Services, APIs, success states | +| Orange | `#c49858` | `#a87c3c` | Queues, async, warnings | +| Red | `#b07070` | `#944c4c` | Errors, critical paths | +| Purple | `#8a78a8` | `#6e5c90` | Auth, security, policies | + +### Applying Hierarchy Styles + +Use `style` directives for subgraph containers and `classDef`/`class` for nodes: + +```text +%% Subgraph fills — alpha-transparent hex (8-digit #RRGGBBAA) +style L1_id fill:#5888a833,stroke:#3c6c904D,stroke-width:2px +style L2_id fill:#5888a866,stroke:#3c6c908C,stroke-width:2px +style L3_id fill:#5888a8A6,stroke:#3c6c90CC,stroke-width:2px + +%% Node fills — fully opaque, use classDef/class +classDef L4 fill:#5888a8,stroke:#3c6c90,stroke-width:2px +class N1,N2,N3 L4 +``` + +### Common Mistakes + +> **CRITICAL:** `classDef`/`class` does NOT style subgraphs — it only styles nodes. +> Subgraphs MUST use `style` directives. If you only define `classDef` and `class`, +> nodes will be colored but subgraph containers will render with the default transparent +> background — invisible against the document background. + +--- + +## Subgraph Structure for Hierarchy + +Use nested `subgraph` blocks to represent containment. Each subgraph gets a quoted title label. + +```text +graph TD + subgraph L1["Platform"] + subgraph L2["Service"] + subgraph L3["Module"] + N1["Component A"] + N2["Component B"] + end + end + end +``` + +Rules: + +- **Maximum 4 levels** of nesting (3 subgraph levels + nodes) +- Keep subgraph titles short (under 25 characters) +- Place `style` directives for subgraphs **after the graph definition**, not inside subgraph blocks +- Use descriptive but concise subgraph IDs (e.g., `L2_api`, `L3_auth`) + +--- + +## Edge and Connector Styling + +### Edge Labels + +- Keep labels under 25 characters +- Use abbreviations: "Config" for "Configuration", "Exec" for "Execution", "Auth" for "Authentication" +- Use `|label text|` syntax on the arrow: `A -->|validates| B` + +### linkStyle Directives + +Apply `linkStyle` using 0-based edge index (order edges appear in the source): + +```text +linkStyle 0 stroke:#4a90d9,stroke-width:2px +linkStyle 1 stroke:#d06050,stroke-width:2px,stroke-dasharray:5 5 +``` + +### Recommended Edge Colors + +| Type | Stroke Color | Style | +|------|-------------|-------| +| Data flow | `#4a90d9` | solid, 2px | +| Control flow | `#34a870` | solid, 2px | +| Error/fallback | `#d06050` | dashed, 2px | +| Async/eventual | `#e8a838` | dashed, 2px | +| Weak/optional | `#8a8a8a` | dotted, 1px | + +--- + +## Text Length Optimization + +- **CRITICAL:** Keep node labels concise to prevent text cutoff in diagram boxes +- **REQUIRED:** Remove file extensions from names in diagrams (e.g., `execution_pipeline` not `execution_pipeline.groovy`) +- **REQUIRED:** Truncate long edge labels (e.g., `QT-SECURITY/ECG2_SECURITY_EXEC` not `QT-SECURITY/ECG2_SECURITY_EXECUTION`) +- **REQUIRED:** Shorten descriptive text while preserving meaning +- Recommended: Keep node text under 30 characters per line, edge labels under 25 characters +- Use abbreviations for common terms: "Config", "Exec", "Auth", "Mgmt", "Svc", "DB" +- Break long text into multiple lines using `
` tags when needed +- Prioritize essential information over complete names in constrained diagram space + +--- + +## Object Ownership Diagrams + +Use member names as link text, not legend descriptions. + +Copy the legend below once per document, then create ownership diagrams as needed: + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal'}}}%% +graph LR + A[Class A] + B[Class B] + C[Class C] + D[Class D] + + A -->|member_b_| B + A -->|member_d_| D + A --o|member_c_| C + D -.->|borrowed_q_| Q + + linkStyle 0 stroke:#5a5a5a,stroke-width:2px + linkStyle 1 stroke:#5a5a5a,stroke-width:2px + linkStyle 2 stroke:#4a90d9,stroke-width:2px + linkStyle 3 stroke:#5a5a5a,stroke-width:2px + + classDef default fill:#c8d5e2,stroke:#7898b0,stroke-width:1px +``` + +### 3 Ownership Dimensions (visual encoding: line style + arrow end + color) + +1. **Lifetime Management** — destruction responsibility: + - **Owns:** `unique_ptr` / `shared_ptr` / manual delete → solid lines + - **Borrows:** raw pointer / `weak_ptr` → dotted lines (`-.->`) + +2. **Object Lifetime** — creation patterns: + - **Permanent:** init-time, program lifetime → arrow end `>` + - **Temporary:** request/task creation → circle end `o` + +3. **Type Polymorphism** — member type analysis: + - **Non-polymorphic:** concrete type, no virtual dispatch → dark gray stroke (`#5a5a5a`) + - **Polymorphic:** base/interface type with virtual functions → blue stroke (`#4a90d9`) + +**Analysis:** Find member variables (pointers, references, smart pointers, containers). Check +change/creation patterns. Exclude PImpl without runtime dispatch. + +--- + +## Flat Peer Subgraph Diagrams + +For diagrams where **multiple peer-level subgraphs** each represent a distinct semantic domain +(not nested hierarchy), use **color-coordinated groups**: the subgraph container uses the base +color at **40% alpha** (`66` suffix), and child nodes use the same base color at **100% opacity**. + +### Color-Coordinated Group Palette + +Each group shares a base color. The container gets alpha-transparent fill; nodes get solid fill: + +| Group | Container Fill | Container Stroke | Node Fill | Node Stroke | +|-------|---------------|-----------------|-----------|-------------| +| Green | `#34a87066` | `#1e88508C` | `#34a870` | `#1e8850` | +| Blue | `#4a90d966` | `#2c6cb08C` | `#4a90d9` | `#2c6cb0` | +| Orange | `#e8a83866` | `#c088288C` | `#e8a838` | `#c08828` | +| Purple | `#8e6aad66` | `#6e4a8d8C` | `#8e6aad` | `#6e4a8d` | +| Blue-gray | `#5a7a9066` | `#3e5e748C` | `#5a7a90` | `#3e5e74` | +| Red | `#d0605066` | `#a848388C` | `#d06050` | `#a84838` | + +### Flat Peer Template + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal'}}}%% +flowchart TD + subgraph GRP_A["Group A"] + A1["Node A1"] + A2["Node A2"] + end + + subgraph GRP_B["Group B"] + B1["Node B1"] + B2["Node B2"] + end + + A1 -->|connects| B1 + A2 -.->|fallback| B2 + + style GRP_A fill:#34a87066,stroke:#1e88508C,stroke-width:2px + style GRP_B fill:#4a90d966,stroke:#2c6cb08C,stroke-width:2px + + classDef grpA fill:#34a870,stroke:#1e8850,stroke-width:2px + classDef grpB fill:#4a90d9,stroke:#2c6cb0,stroke-width:2px + class A1,A2 grpA + class B1,B2 grpB + + linkStyle 0 stroke:#34a870,stroke-width:2px + linkStyle 1 stroke:#4a90d9,stroke-width:2px,stroke-dasharray:5 5 +``` + +Rules: + +- **Every subgraph** MUST have a `style` directive with alpha-transparent fill +- Node `classDef` uses the **same base color** as its parent subgraph container (at 100% opacity) +- Edge `linkStyle` colors should match the source or target subgraph color family +- Maximum **6 color groups** per diagram for visual clarity + +--- + +## Flat Peer Subgraph Diagrams — Border Only + +A lighter variant of flat peer subgraphs where **only colored borders** differentiate groups — +no background fills on containers or nodes. This produces a minimal, clean appearance where +nodes inherit the page background and colored strokes provide all semantic grouping. + +**When to use:** Prefer border-only when diagrams have many nodes and filled backgrounds feel +visually heavy, or when maximum text readability is needed (text sits directly on the page +background). + +### Text Color for Transparent Fills + +With `fill:none`, the Mermaid renderer cannot auto-compute a contrasting text color because +there is no opaque fill to measure against. Text defaults to dark, which is unreadable on dark +backgrounds. The solution: **explicitly set a balanced mid-tone text color** that provides +sufficient contrast against both light (`#ffffff`) and dark (`#0d1117`) backgrounds. + +| Variable | Value | vs White | vs Dark | Role | +|----------|-------|----------|---------|------| +| `primaryTextColor` | `#6b7b8b` | 4.35:1 | 4.35:1 | Subgraph titles, default text | +| `color` in `classDef` | `#6b7b8b` | 4.35:1 | 4.35:1 | Node label text | + +> **Exception to the "no explicit `color:#`" rule:** The border-only variant REQUIRES explicit +> `color:#6b7b8b` in `classDef` and `primaryTextColor` in `themeVariables` because transparent +> fills break the renderer's automatic text color computation. This is the only variant where +> explicit text color is permitted. + +### Border-Only Group Palette + +Each group is identified by stroke color alone. Containers and nodes share the same stroke. +Fills are explicitly `none` (transparent): + +| Group | Container Stroke | Node Stroke | Stroke Width | +|-------|-----------------|-------------|--------------| +| Green | `#34a870` | `#34a870` | 2px | +| Blue | `#4a90d9` | `#4a90d9` | 2px | +| Orange | `#e8a838` | `#e8a838` | 2px | +| Purple | `#8e6aad` | `#8e6aad` | 2px | +| Blue-gray | `#5a7a90` | `#5a7a90` | 2px | +| Red | `#d06050` | `#d06050` | 2px | + +### Border-Only Flat Peer Template + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal', 'primaryTextColor': '#6b7b8b'}}}%% +flowchart TD + subgraph GRP_A["Group A"] + A1["Node A1"] + A2["Node A2"] + end + + subgraph GRP_B["Group B"] + B1["Node B1"] + B2["Node B2"] + end + + A1 -->|connects| B1 + A2 -.->|fallback| B2 + + style GRP_A fill:none,stroke:#34a870,stroke-width:2px,color:#6b7b8b + style GRP_B fill:none,stroke:#4a90d9,stroke-width:2px,color:#6b7b8b + + classDef grpA fill:none,stroke:#34a870,stroke-width:2px,color:#6b7b8b + classDef grpB fill:none,stroke:#4a90d9,stroke-width:2px,color:#6b7b8b + class A1,A2 grpA + class B1,B2 grpB + + linkStyle 0 stroke:#34a870,stroke-width:2px + linkStyle 1 stroke:#4a90d9,stroke-width:2px,stroke-dasharray:5 5 +``` + +Rules: + +- **All fills are `none`** — both subgraph `style` directives and node `classDef` use `fill:none` +- **All `classDef` MUST include `color:#6b7b8b`** — required for node label readability on both + light and dark backgrounds (transparent fills break auto text color computation) +- **All subgraph `style` directives MUST include `color:#6b7b8b`** — required for subgraph title + readability; `primaryTextColor` alone does not override subgraph label color +- **The init directive MUST include `'primaryTextColor': '#6b7b8b'`** — covers edge labels and + any other text not styled by `classDef` or subgraph `style` +- Stroke colors use the **medium-tone base colors** (45–75% lightness) for visibility on both + light and dark backgrounds +- Edge `linkStyle` colors should match the source or target group's stroke color +- Maximum **6 color groups** per diagram for visual clarity + +--- + +## Sequence Diagrams + +Sequence diagrams have unique dark mode challenges because participant labels, message text, +loop labels, and notes render against the **page background** — not against styled node fills. +With the `base` theme, all text defaults to dark, which is invisible on dark backgrounds. + +### Required Theme Configuration for Sequence Diagrams + +Sequence diagrams MUST use an extended `init` directive that sets explicit colors for all +visual elements: + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal', 'actorBkg': '#5888a8', 'actorBorder': '#3c6c90', 'actorTextColor': '#f5f5f5', 'actorLineColor': '#5a7a90', 'signalColor': '#5a7a90', 'signalTextColor': '#6b7b8b', 'noteBkgColor': '#c49858', 'noteBorderColor': '#a87c3c', 'noteTextColor': '#f5f5f5', 'loopTextColor': '#6b7b8b', 'labelBoxBkgColor': '#5888a866', 'labelBoxBorderColor': '#3c6c908C', 'activationBkgColor': '#5888a866', 'activationBorderColor': '#3c6c90'}}}%% +``` + +> **Exception to the "no explicit text color" rule:** Sequence diagrams REQUIRE explicit +> `actorTextColor`, `signalTextColor`, `noteTextColor`, and `loopTextColor` in `themeVariables` +> because these text elements render against either solid fills (actors, notes) or the page +> background (signals, loops) — neither of which the `base` theme can auto-adapt for dark mode. +> This is the same category of exception as the border-only flowchart variant. + +### Sequence Diagram Color Variables + +| Variable | Value | Purpose | +|----------|-------|---------| +| `actorBkg` | `#5888a8` | Participant box fill (solid medium-tone) | +| `actorBorder` | `#3c6c90` | Participant box border | +| `actorTextColor` | `#f5f5f5` | Participant label text (light on medium fill) | +| `actorLineColor` | `#5a7a90` | Participant lifeline | +| `signalColor` | `#5a7a90` | Arrow/message line color | +| `signalTextColor` | `#6b7b8b` | Message label text (mid-tone, floats on page bg) | +| `noteBkgColor` | `#c49858` | Note box fill (medium-tone orange) | +| `noteBorderColor` | `#a87c3c` | Note box border | +| `noteTextColor` | `#f5f5f5` | Note text (light on medium fill) | +| `loopTextColor` | `#6b7b8b` | Loop/alt/opt label text (mid-tone, on page bg) | +| `labelBoxBkgColor` | `#5888a866` | Loop label box fill (alpha-transparent) | +| `labelBoxBorderColor` | `#3c6c908C` | Loop label box border | +| `activationBkgColor` | `#5888a866` | Activation bar fill (alpha-transparent) | +| `activationBorderColor` | `#3c6c90` | Activation bar border | + +### Design Rationale + +- **Elements with solid fills** (actor boxes, note boxes): use `#f5f5f5` (near-white) text + because the medium-tone fill provides a stable, contrast-guaranteed background regardless + of page theme +- **Elements floating on page background** (signal labels, loop text): use `#6b7b8b` (mid-tone) + which provides 4.35:1 contrast against both white (`#ffffff`) and dark (`#0d1117`) backgrounds +- **Alpha-transparent fills** (loop boxes, activation bars): use `66` / `8C` alpha suffixes + for the same bi-directional hierarchy effect as subgraph containers + +### Sequence Diagram Template + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal', 'actorBkg': '#5888a8', 'actorBorder': '#3c6c90', 'actorTextColor': '#f5f5f5', 'actorLineColor': '#5a7a90', 'signalColor': '#5a7a90', 'signalTextColor': '#6b7b8b', 'noteBkgColor': '#c49858', 'noteBorderColor': '#a87c3c', 'noteTextColor': '#f5f5f5', 'loopTextColor': '#6b7b8b', 'labelBoxBkgColor': '#5888a866', 'labelBoxBorderColor': '#3c6c908C', 'activationBkgColor': '#5888a866', 'activationBorderColor': '#3c6c90'}}}%% +sequenceDiagram + participant A as Service A + participant B as Service B + participant C as Service C + + A->>B: request() + B->>C: delegate() + C-->>B: response + B-->>A: result + + loop Retry + A->>B: retry() + B-->>A: ack + end + + Note over B,C: Processing phase +``` + +Rules: + +- **Copy the full `init` directive** for every sequence diagram — do not use the shorter + flowchart init (it lacks the sequence-specific variables) +- Keep participant aliases short (2–4 characters) to reduce horizontal sprawl +- Use `
` in participant display names for multi-line labels +- Prefer `->>` (solid with arrowhead) for synchronous calls, `-->>` (dashed) for responses +- Keep message labels under 30 characters + +--- + +## Basic Template (Non-Hierarchical, No Subgraphs) + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal'}}}%% +graph LR + A["Component A"] -->|data flow| B["Component B"] + B -.->|fallback| C["Component C"] + C ==>|critical| D["Component D"] + + classDef primary fill:#4a90d9,stroke:#2c6cb0,stroke-width:2px + classDef secondary fill:#34a870,stroke:#1e8850,stroke-width:2px + class A,B primary + class C,D secondary + + linkStyle 0 stroke:#4a90d9,stroke-width:2px + linkStyle 1 stroke:#d06050,stroke-width:2px,stroke-dasharray:5 5 + linkStyle 2 stroke:#34a870,stroke-width:3px +``` + +## Hierarchical Template (4 Levels) + +```text +%%{init: {'theme':'base', 'themeVariables': {'fontFamily': 'Arial, sans-serif', 'fontSize': '13px', 'fontWeight': 'normal'}}}%% +graph TD + subgraph L1["Outer Container"] + subgraph L2["Section"] + subgraph L3["Module"] + N1["Node A"] + N2["Node B"] + end + end + end + + N1 -->|connects| N2 + + style L1 fill:#5888a833,stroke:#3c6c904D,stroke-width:2px + style L2 fill:#5888a866,stroke:#3c6c908C,stroke-width:2px + style L3 fill:#5888a8A6,stroke:#3c6c90CC,stroke-width:2px + + classDef L4 fill:#5888a8,stroke:#3c6c90,stroke-width:2px + class N1,N2 L4 +``` + +--- + +## PDF Export + +Use **Markdown Preview Enhanced → Puppeteer (Chromium)** for PDF export. Puppeteer renders +in a full Chromium browser, so Mermaid blocks execute natively — no pre-rendering needed. + +- **Do NOT use Prince for documents containing Mermaid diagrams.** Prince is a CSS-to-PDF + engine that does not execute JavaScript; Mermaid blocks appear as raw text +- The Puppeteer export renders against a **light background** by default — alpha-transparent + container fills (`#RRGGBBAA`) will composite as the light-mode palette +- All three rendering targets (VS Code preview, GitHub, Puppeteer PDF) use Chromium engines, + ensuring consistent Mermaid rendering across all outputs + +--- + +## Limitations + +- **HEX only** — 6-digit (`#RRGGBB`) or 8-digit with alpha (`#RRGGBBAA`). No CSS color names, + no `rgba()`, no HTML/CSS/SVG/gradients/external styles +- **8-digit hex** (`#RRGGBBAA`) required for hierarchy containers — supported by all modern + browsers, GitHub's Mermaid renderer, VS Code (Chromium), and Prince 12+ +- Global theme via `%%{init: { "themeVariables": {...} }}%%` for font configuration +- **NO inline comments** (`%%comment%%`) in GitHub renderer — use separate comment blocks if needed +- **MUST** have blank line after closing ` ``` ` fence before any following text +- Subgraph nesting is limited to 3 levels deep (+ nodes = 4 visual levels) +- `linkStyle` indices are 0-based and count edges in source order +- `style` directive is the most reliable way to color subgraphs (preferred over `classDef` + `class` for subgraphs) +- GitHub, VS Code Markdown Preview Enhanced, and Prince may have minor rendering differences — test across all three targets diff --git a/.github/prompts/e2e-test-cycle.prompt.md b/.github/prompts/e2e-test-cycle.prompt.md new file mode 100644 index 000000000..8d3517a4d --- /dev/null +++ b/.github/prompts/e2e-test-cycle.prompt.md @@ -0,0 +1,272 @@ +--- +mode: agent +description: "Run full E2E test sweep, diagnose failures, fix+rebuild+retest until all tests pass" +--- + +# E2E Test / Fix / Retest Cycle + +You are an autonomous test engineer. Your job is to run the full end-to-end test suite, identify +every failure, fix each one, and re-verify until **all runnable tests pass**. Do not stop until the +outer loop completes with zero failures. + +## Prerequisites + +Before starting, verify the stack is healthy: + +```bash +# Check all services are running +./scripts/stack_control.sh status + +# Quick health check +curl -sf http://localhost:8000/health || echo "BACKEND DOWN" +``` + +If services are down, bring them up with `./scripts/stack_control.sh start` and wait for health. +If the stack fails to start after two attempts, **stop and report the infrastructure issue** — do not +enter the test loop with a broken stack. + +## Outer Loop: Full Test Sweep + +Run the **complete** E2E test suite: + +```bash +cd /home/mdear/workspaces/git/ii-agent +source ~/workspaces/venvs/ii-agent/bin/activate +python3 scripts/local/test_e2e.py 2>&1 +``` + +Parse the output summary to collect: +- Total tests run, passed, failed, skipped, errored +- For each non-passing test: the **test ID** (e.g. `CHAT-01`), **category**, **status**, and **failure notes** + +### Decision Point + +| Condition | Action | +|-----------|--------| +| All tests PASS (or SKIP with known reason) | **DONE** — report final results and exit | +| Any tests FAIL or ERROR | Enter the **Inner Loop** for each failure | + +## Inner Loop: Fix Each Failure + +Maintain a running tally of fix attempts per test ID (e.g. `CHAT-01: attempt 2/3`). This is +critical for enforcing the 3-attempt limit since the conversation may be long. + +For **each** failed/errored test (process one at a time, in test-ID alphabetical order): + +### Step 1 — Diagnose + +1. Re-run the single failing test in isolation to confirm it still fails: + ```bash + TEST_ID="" python3 scripts/local/test_e2e.py 2>&1 + ``` +2. Read the failure output carefully. Check backend and sandbox logs filtered to the relevant + time window (use the test's session ID or a recent timestamp to narrow results): + ```bash + # Backend logs — filter by session ID from test output if available + ./scripts/stack_control.sh logs backend 2>&1 | grep -i "error\|exception\|traceback" | tail -50 + + # Sandbox container logs (find running sandbox first) + SANDBOX_ID=$(docker ps --filter 'name=ii-sandbox' -q | head -1) + [[ -n "$SANDBOX_ID" ]] && docker logs "$SANDBOX_ID" 2>&1 | grep -i "error\|exception\|traceback" | tail -50 + ``` + If grep filters too aggressively, fall back to `| tail -100` without grep. +3. Identify the **root cause** — is it: + - A backend code bug? → fix the source file + - A sandbox code bug? → fix under `src/ii_sandbox_server/` or `docker/sandbox/` + - A test script bug? → fix `scripts/local/test_e2e.py` + - A configuration/environment issue? → fix config or env + - A timeout that needs tuning? → adjust timeout constants + - A transient/flaky failure? → re-run once more to confirm before skipping + - An external dependency issue (quota, network)? → mark SKIP with reason, move on + +### Step 2 — Fix + +Apply the minimal fix to the identified source file(s). Follow project conventions: +- Use `uv run ruff check --fix-only ` and `uv run ruff format ` on + any modified Python files under `src/` +- Do NOT add unnecessary abstractions, comments, or refactoring beyond the fix +- If you only changed the test script (`scripts/local/test_e2e.py`) and no source code, skip the + rebuild step entirely — just re-run the test + +### Step 3 — Rebuild (if code changed) + +Determine which components are affected by your changes and rebuild accordingly. + +#### Backend changes (`src/ii_agent/`, `src/ii_server/`) + +Rebuild and restart the backend: + +```bash +./scripts/stack_control.sh rebuild backend 2>&1 | tail -15 +echo "Exit code: $?" +``` + +If exit code is non-zero, the build failed — read the full output to diagnose. If the rebuild uses +cached layers and your fix isn't picked up, use `--no-cache`: + +```bash +./scripts/stack_control.sh rebuild backend --no-cache 2>&1 | tail -15 +echo "Exit code: $?" +``` + +Wait for the backend to become healthy before proceeding: + +```bash +for i in $(seq 1 30); do + curl -sf http://localhost:8000/health && echo " Backend ready" && break + echo " Waiting for backend... ($i/30)" + sleep 2 +done +curl -sf http://localhost:8000/health || echo "ERROR: Backend failed to start after 60s — check logs" +``` + +If the backend fails to start, check logs (`./scripts/stack_control.sh logs backend 2>&1 | tail -50`) +and fix the startup error before retesting. + +#### Sandbox changes + +Sandbox code lives in several locations. Use the appropriate rebuild mode: + +| What changed | Rebuild command | +|---|---| +| Python source only (`src/ii_sandbox_server/`, `src/ii_agent_tools/`, `docker/sandbox/*.py`) | `./scripts/stack_control.sh build-sandbox --quick` | +| Dockerfile or system deps (`e2b.Dockerfile`, `docker/sandbox/start-services.sh`, `docker/sandbox/pyproject.toml`) | `./scripts/stack_control.sh build-sandbox` | +| Running sandbox containers need hot-patch (src-only, skip image rebuild) | `./scripts/stack_control.sh patch-sandbox` (copies + restarts services) | + +**`--quick` mode** uses Docker layer cache and only rebuilds source layers — fast for Python-only +changes. **Full mode** (no flag) does `--no-cache` and rebuilds everything including system packages. + +After a sandbox rebuild, existing sandbox containers use the old image. New sandboxes spawned by +subsequent agent queries will use the updated image automatically. The E2E tests create fresh +sessions, so each test run will get a new sandbox with the updated image — no manual action needed. + +#### Both backend and sandbox changed + +If your fix touches both backend and sandbox code, rebuild both. Choose the appropriate sandbox +mode based on what changed (see table above): + +```bash +# Use --quick for src-only sandbox changes, omit for Dockerfile/system changes +./scripts/stack_control.sh build-sandbox --quick 2>&1 | tail -10 +./scripts/stack_control.sh rebuild backend 2>&1 | tail -15 +for i in $(seq 1 30); do + curl -sf http://localhost:8000/health && echo " Backend ready" && break + sleep 2 +done +curl -sf http://localhost:8000/health || echo "ERROR: Backend failed to start" +``` + +### Step 4 — Retest the Single Fix + +Re-run **only** the test you just fixed: + +```bash +TEST_ID="" python3 scripts/local/test_e2e.py 2>&1 +``` + +- If it **passes**: mark this failure as resolved, move to next failure in the inner loop +- If it **still fails**: return to Step 1 with the new error output. Do not loop more than + 3 attempts on the same test — if still failing after 3 fix attempts, log the issue and move on + +### Step 5 — After All Failures Processed + +Once every failure from the inner loop has been addressed (fixed or logged as unresolvable after +3 attempts), return to the **Outer Loop** and run the full suite again. + +## Outer Loop Re-entry + +After the inner loop completes, run the full suite again from the top: + +```bash +cd /home/mdear/workspaces/git/ii-agent +source ~/workspaces/venvs/ii-agent/bin/activate +python3 scripts/local/test_e2e.py 2>&1 +``` + +This catches regressions introduced by fixes. Repeat the outer→inner loop cycle until: + +- **All tests PASS or SKIP** (with documented skip reasons), OR +- **No new progress** is possible (same failures persist after a full inner loop cycle) + +## Completion Criteria + +The cycle is **complete** when ONE of these is true: + +1. **All tests pass**: every test is PASS or SKIP-with-reason +2. **Plateau reached**: a full outer loop produces the exact same set of failures as the previous + outer loop (no progress was made) — report the stuck failures and stop +3. **Max iterations reached**: after **5 outer loop iterations**, stop regardless and report current + state — this prevents infinite see-saw regression cycles + +## Output Format + +After completion, report a summary table: + +``` +E2E Test Cycle Complete +═══════════════════════ +Outer loop iterations: N +Total tests: X + PASS: Y + SKIP: Z (with reasons) + FAIL: W (with root cause notes) + +Fixes applied: + - : + +Unresolved issues: + - : +``` + +## Environment Variables + +The test script supports filtering: + +| Variable | Purpose | Example | +|----------|---------|---------| +| `TEST_CATEGORY` | Run only one category | `TEST_CATEGORY=CHAT python3 scripts/local/test_e2e.py` | +| `TEST_ID` | Run a single test | `TEST_ID=IMG-01 python3 scripts/local/test_e2e.py` | +| `BACKEND_URL` | Override backend URL | Default: `http://localhost:8000` | +| `TOKEN` | Override auth token | Has default for local dev user | +| `E2E_SESSION_TTL` | Seconds until test sessions auto-delete | Default: `86400` (24 hours) | + +## Automatic Session Cleanup + +The test script automatically schedules every session it creates for deletion after `E2E_SESSION_TTL` +seconds (default: 24 hours). This uses the `POST /sessions/{session_id}/schedule-delete` endpoint +with `{"delete_after_seconds": }`. The backend's orphan cleanup loop (60-second sweep) soft-deletes +expired sessions, which cascades to sandbox container teardown. + +- Cleanup scheduling is **non-fatal** — a failure to schedule does not fail the test +- Set `E2E_SESSION_TTL=0` to disable automatic scheduling (sessions persist until manually deleted) +- The test summary prints how many sessions were scheduled for cleanup at the end of the run +- To inspect a session before auto-cleanup, use its session ID within the 24-hour window + +If you need to manually trigger immediate deletion of a test session instead of waiting: + +```bash +curl -sf -X DELETE "$BACKEND_URL/sessions/" -H "Authorization: Bearer $TOKEN" +``` + +## Test Categories + +| ID | Category | Tests | +|----|----------|-------| +| INF | Infrastructure | Health, models, sandbox readiness | +| CHAT | Chat Mode (REST) | Anthropic, OpenAI, multi-turn, web search, long response, stop | +| IMG | Image Attachments | Upload, chat attachment, agent attachment | +| WEB | Web Search & Browser | Agent web search, browser navigation | +| CODE | Code Execution | Single file, multi-file sandbox execution | +| SESS | Session Management | List, events, pin, fork | +| AGEN | Agent Multi-Turn | Context retention, tool use across turns | +| XFEAT | Cross-Feature | Agent web search + file, chat then agent on same session | +| HIST | Chat History | Message persistence and retrieval | + +## Critical Rules + +- **NEVER use raw `docker compose`** — always use `./scripts/stack_control.sh` +- **NEVER stop before all runnable tests have been executed and the outer loop is satisfied** +- **Run ruff** on any changed Python files under `src/` before rebuilding +- Keep fixes minimal — do not refactor or improve code beyond what the failing test requires +- If a test is SKIP due to external factors (API quota, missing credentials), document it and move on +- Do not modify test expectations to make tests pass — fix the underlying code instead diff --git a/.gitignore b/.gitignore index caac46fd7..61d179422 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,22 @@ trace_logs/ +# Docker stack env files (secrets) — keep *.example files tracked docker/.stack.env +docker/.stack.env.local docker/.stack.env.sh +docker/.env + +# dotenv environment variable files — keep *.example files tracked +.env +.env.local +.env.development.local +.env.test.local +.env.production.local +.env.tool +.env.sandbox +.env.claude +.envrc +model_configs.yaml # Python-generated files __pycache__/ @@ -14,8 +29,6 @@ wheels/ # Rust build output target/ -.claude/ - # Virtual environments .venv @@ -25,19 +38,11 @@ target/ *.sqlite3 # MacOS X gitignore -# General .DS_Store .AppleDouble .LSOverride - -# Icon must end with two \r Icon - - -# Thumbnails ._* - -# Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 @@ -45,8 +50,6 @@ Icon .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder @@ -62,7 +65,7 @@ yarn-error.log* lerna-debug.log* .pnpm-debug.log* -# Diagnostic reports (https://nodejs.org/api/report.html) +# Diagnostic reports report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json # Runtime data @@ -71,45 +74,39 @@ pids *.seed *.pid.lock -# Directory for instrumented libs generated by jscoverage/JSCover +# Coverage lib-cov - -# Coverage directory used by tools like istanbul coverage *.lcov - -# nyc test coverage .nyc_output +.coverage -# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +# Grunt .grunt -# Bower dependency directory (https://bower.io/) +# Bower bower_components -# node-waf configuration +# node-waf .lock-wscript -# Compiled binary addons (https://nodejs.org/api/addons.html) +# Compiled addons build/Release # Dependency directories node_modules/ jspm_packages/ - -# Snowpack dependency directory (https://snowpack.dev/) web_modules/ # TypeScript cache *.tsbuildinfo -# Optional npm cache directory +# npm / pnpm .npm +frontend/.pnpm-store/* -# Optional eslint cache +# Lint caches .eslintcache - -# Optional stylelint cache .stylelintcache # Microbundle cache @@ -118,100 +115,59 @@ web_modules/ .rts2_cache_es/ .rts2_cache_umd/ -# Optional REPL history +# REPL history .node_repl_history -# Output of 'npm pack' +# npm pack output *.tgz -# Yarn Integrity file +# Yarn .yarn-integrity +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* -# dotenv environment variable files -.env -model_configs.yaml -.env.development.local -.env.test.local -.env.production.local -.env.local -.env.tool -.env.sandbox -.env.claude - -# parcel-bundler cache (https://parceljs.org/) +# Bundler / framework caches .cache .parcel-cache - -# Next.js build output .next out - -# Nuxt.js build / generate output .nuxt -dist - -# Gatsby files -.cache/ -# Comment in the public line in if your project uses Gatsby and not Next.js -# https://nextjs.org/blog/next-9-1#public-directory-support -# public - -# vuepress build output .vuepress/dist - -# vuepress v2.x temp and cache directory .temp -.cache - -# vitepress build output **/.vitepress/dist - -# vitepress cache directory **/.vitepress/cache - -# Docusaurus cache and generated files .docusaurus - -# Serverless directories .serverless/ - -# FuseBox cache .fusebox/ - -# DynamoDB Local files .dynamodb/ -# TernJS port file +# TernJS .tern-port -# Stores VSCode versions used for testing VSCode extensions +# VS Code test .vscode-test -# yarn v2 -.yarn/cache -.yarn/unplugged -.yarn/build-state.yml -.yarn/install-state.gz -.pnp.* - +# Project workspace & output agent_logs.txt workspace/ tmp/ -data/file_store -data/workspace -data/logs -data/events.db +data/ output/ +# Editor / IDE / AI .vscode/ -.envrc - -# local only scripts -start_tool_server.sh -a2a_agents.json - .idea/ .claude/ .codex/ .shared/ .gemini/ + +# Local only scripts +start_tool_server.sh +a2a_agents.json +scripts/local/register_seats_mcp.sh +scripts/local/create_seats_dark_template.sh +scripts/local/rctcop_title_slide_rework.sh diff --git a/AGENTS.md b/AGENTS.md index 85f2b71b3..bdfce3f76 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -54,7 +54,7 @@ src/ii_agent/ │ ├── llm/ # LLM billing service, execution service, base client │ ├── redis/ # Redis client, cache, pubsub, lock, cancel management │ ├── secrets/ # GCP Secret Manager integration -│ ├── storage/ # File storage abstraction (GCS, local) +│ ├── storage/ # File storage abstraction (GCS, MinIO) │ ├── container.py # ServiceContainer for complex dependency graphs │ └── dependencies.py # DBSession, SettingsDep (shared Dep aliases) │ @@ -72,7 +72,7 @@ src/ii_agent/ │ └── webhook_handler.py # Stripe webhook processing │ ├── sessions/ # Chat session management -│ ├── models.py # Session model, SessionStateEnum, AppKind +│ ├── models.py # Session model, SessionStateEnum, AppKind, delete_after │ ├── service.py # Session CRUD, state transitions │ ├── fork_service.py # Session forking │ ├── title_service.py # Auto-title generation @@ -165,7 +165,7 @@ These `core/` modules are available to all domains: | `core/config/` | Application settings | `Settings`, `get_settings()` | | `core/db/` | Database connection | `Base`, `TimestampColumn`, `get_db_session_local()` | | `core/redis/` | Caching, pubsub, locks | `redis_client`, `EntityCache`, `AsyncIOPubSub` | -| `core/storage/` | File storage (GCS) | `BaseStorage`, `storage`, `media_storage` | +| `core/storage/` | File storage (GCS, MinIO) | `BaseStorage`, `storage`, `media_storage` | | `core/llm/` | LLM billing & execution | `LLMBillingService`, `LLMExecutionService` | | `core/secrets/` | Secret management | GCP Secret Manager integration | | `core/dependencies.py` | Shared Dep aliases | `DBSession`, `SettingsDep` | @@ -226,6 +226,9 @@ WebSocket (Socket.IO) | slide_design | `/slides/design` | Slide design | | nano_banana | `/slides/nano-banana` | Nano banana slides | | health | `/health` | Health check | +| storage_proxy | `/storage` | Storage proxy (local deploy) | +| slide_assets | `/files/slides/assets` | Slide assets | +| sandbox_files | `/sandbox-files` | Sandbox file preview | ### Key Design Decisions @@ -233,8 +236,8 @@ WebSocket (Socket.IO) - **Dep aliases everywhere**: FastAPI dependency injection uses `Annotated[T, Depends(factory)]` pattern exclusively. - **Redis optional**: All Redis usage has in-memory fallbacks for single-worker deployments. - **Billing via reservations**: All billable work uses reserve -> settle -> release, never direct deductions. -- **GCS for storage**: File uploads, media, and slides use Google Cloud Storage with signed URLs. -- **E2B for sandboxes**: Code execution happens in isolated E2B sandbox environments. +- **GCS/MinIO for storage**: File uploads, media, and slides use Google Cloud Storage (prod) or MinIO (local Docker) with signed or proxied URLs. +- **E2B/Docker for sandboxes**: Code execution happens in isolated E2B (cloud) or Docker (local) sandbox environments. ## Where to Look diff --git a/CLAUDE.md b/CLAUDE.md index fc7258f99..8558f0006 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,7 +18,7 @@ src/ii_agent/ │ ├── llm/ # LLM billing service, execution service, base utilities │ ├── middleware/ # CORS, request tracing, exception handling │ ├── redis/ # Async Redis client, cache, cancel tokens -│ ├── storage/ # GCS/local file storage abstraction + path resolver +│ ├── storage/ # GCS/MinIO file storage abstraction + path resolver │ └── container.py # ApplicationContainer singleton (global + app.state) │ ├── auth/ # OAuth 2.0, JWT (uuid.UUID user_id), session management @@ -29,7 +29,7 @@ src/ii_agent/ │ ├── tasks/ # Unified run lifecycle tracker (RunTask + TaskLog) -- CANONICAL DOMAIN │ -├── sessions/ # Chat sessions (CRUD, state, fork, title, validation) +├── sessions/ # Chat sessions (CRUD, state, fork, title, timed delete) │ ├── pin/ # Session pins │ └── wishlist/ # Session wishlists/bookmarks │ @@ -185,6 +185,9 @@ Socket "chat_message" -> CommandHandlerFactory | `/connectors/composio` | `integrations/connectors/composio/router.py` | Composio | | `/connectors` | `integrations/connectors/router.py` | Connectors (GitHub, Google) | | `/enhance-prompt` | `integrations/enhance_prompt/router.py` | Prompt Enhancement | +| `/storage` | `files/storage_proxy_router.py` | Storage Proxy (local deploy) | +| `/files/slides/assets` | `files/slide_assets_router.py` | Slide Assets | +| `/sandbox-files` | `files/sandbox_files_router.py` | Sandbox File Preview | Router registration: `app/routers.py::include_routers(app)` @@ -296,6 +299,51 @@ Storybook 1──N StorybookPage 1──N StorybookPageLink SlideContent 1──N SlideVersion ``` +## Billing & Credit System + +### Credit Conversion + +``` +100 II-Agent credits == $1.50 USD +1 USD ≈ 66.67 credits +``` + +Defined in `billing/utils.py`. All USD→credit math uses `Decimal` arithmetic to avoid floating-point loss. + +### Mandatory Rule + +**Never call `CreditService.deduct()` directly** for LLM or tool billing. All billable work flows through the event-driven `CreditUsageHandler` which subscribes to `ModelUsageEvent` and `ToolUsageEvent` on the pub/sub bus. + +### Native Billing Flow + +``` +LLM call completes → ModelUsageEvent published → CreditUsageHandler + → token_count × PricingInfo → USD → credits → CreditService.deduct() + → CreditsDeductedEvent (frontend balance update) + → if balance < minimum: cancel agent run +``` + +Tool billing follows the same pattern via `ToolUsageEvent` with a direct `cost_usd` field. + +### A2A Billing (Inner-Loop Subsidisation) + +When `billing_backend` on a `ModelUsageEvent` starts with `"a2a:"`, the handler uses a configurable strategy instead of standard token pricing. This accounts for subsidised backends like Copilot Business (unlimited) or Copilot Pro+ (premium-request pricing). + +| Strategy (`AGENT_A2A_BILLING_STRATEGY`) | Behaviour | +|---|---| +| `token_based` (default) | Standard token cost × `AGENT_A2A_BILLING_MULTIPLIER` (default 1.0) | +| `provider_reported` | Copilot: `premium_requests × model_multiplier × $0.04`; others: adapter-reported USD | +| `none` | Zero LLM charge (subscription covers inference) | + +Key details: +- Tool costs (image gen, web search) are **always** billed at native rates regardless of strategy +- `is_user_key=True` skips LLM billing entirely (user pays their own API bill) +- Copilot premium-request multipliers are hot-configurable via `AGENT_A2A_COPILOT_MULTIPLIERS` (JSON env) + +**Full design doc:** [`docs/design-docs/a2a-billing-model.md`](docs/design-docs/a2a-billing-model.md) — strategies, deployment decision tree, cost comparisons, config examples. + +**Key files:** `credits/usage/handler.py` (billing logic), `core/config/agent.py` (A2A billing settings), `realtime/events/app_events.py` (ModelUsageEvent schema), `billing/utils.py` (USD↔credit conversion). + ## External Services & Configuration ### External Services @@ -583,7 +631,7 @@ curl http://localhost:8000/health | `core/config/settings.py` | Pydantic settings (`get_settings` singleton) | | `core/db/base.py` | SQLAlchemy Base (UUID PK, DateTime timestamps), TimestampColumn, BaseRepository | | `core/redis/` | Redis client, cache, pubsub, lock, cancel management | -| `core/storage/` | File storage abstraction (GCS, local) + path resolver | +| `core/storage/` | File storage abstraction (GCS, MinIO) + path resolver | | `auth/dependencies.py` | CurrentUser, DBSession, get_current_user | | `tasks/` | Canonical domain implementation (RunTask, TaskLog, types, schemas, exceptions) | | `realtime/handlers/factory.py` | CommandHandlerFactory -- 21 Socket.IO command handlers | diff --git a/docker/.stack.env.local.example b/docker/.stack.env.local.example new file mode 100644 index 000000000..ae4c2bb14 --- /dev/null +++ b/docker/.stack.env.local.example @@ -0,0 +1,73 @@ +# Local-only environment template for ii-agent Docker stack. +# Copy to docker/.stack.env.local and fill in your API keys. +# +# Usage: docker compose -f docker/docker-compose.local.yaml \ +# --env-file docker/.stack.env.local up -d + +# ------------------------- +# Frontend build config +# ------------------------- +FRONTEND_BUILD_MODE=production +VITE_API_URL=http://localhost:8000 +# Dummy client ID to prevent GoogleOAuthProvider crash (no Google login in local mode) +VITE_GOOGLE_CLIENT_ID=disabled-local-mode.apps.googleusercontent.com +VITE_STRIPE_PUBLISHABLE_KEY= +VITE_SENTRY_DSN= +VITE_DISABLE_CHAT_MODE=false + +# ------------------------- +# LLM Configuration +# ------------------------- +# Provide at least one LLM config. Example uses Anthropic Claude: +MODEL_CONFIGS='[{"model_id":"claude-sonnet-4-20250514","provider":"Anthropic","api_key":"replace-me","display_name":"Claude Sonnet 4","is_default":true}]' + +# ------------------------- +# Auth (local dev mode) +# ------------------------- +DEV_AUTH_ENABLED=true + +# ------------------------- +# Storage (Minio - local S3-compatible) +# ------------------------- +STORAGE_PROVIDER=minio +STORAGE_MINIO_ACCESS_KEY=minioadmin +STORAGE_MINIO_SECRET_KEY=minioadmin +STORAGE_MINIO_BUCKET=ii-agent + +# ------------------------- +# Sandbox (Docker provider) +# ------------------------- +SANDBOX_PROVIDER=docker +SANDBOX_DOCKER_IMAGE=ii-agent-sandbox:latest +# Memory limit for sandbox containers (in MB) +# SANDBOX_MEMORY_LIMIT=3072 + +# ------------------------- +# Core infrastructure +# ------------------------- +POSTGRES_USER=iiagent +POSTGRES_PASSWORD=iiagent +POSTGRES_DB=iiagentdev +DATABASE_URL=postgresql+asyncpg://iiagent:iiagent@postgres:5432/iiagentdev + +REDIS_PORT=6379 +BACKEND_PORT=8000 +FRONTEND_PORT=1420 + +# ------------------------- +# Inner loop: A2A protocol (optional — defaults to native if unconfigured) +# The adapter runs inside each sandbox container. +# Backends: copilot | claude-code | codex | simulate +# ------------------------- +# AGENT_INNER_LOOP_MODE=a2a +# AGENT_A2A_BACKEND=copilot +# AGENT_A2A_FALLBACK_TO_NATIVE=true + +# GitHub token for Copilot CLI inside sandbox (required for copilot backend). +# Generate at: https://github.com/settings/tokens?type=beta +# → Fine-grained personal access token +# → Repository access: Public repositories (default — Copilot uses local code) +# → Account permissions: +# Copilot Chat: Read-only +# Copilot Requests: Read-only +# GITHUB_TOKEN= diff --git a/docker/docker-compose.local.yaml b/docker/docker-compose.local.yaml new file mode 100644 index 000000000..0d00c0e63 --- /dev/null +++ b/docker/docker-compose.local.yaml @@ -0,0 +1,152 @@ +# Local-only docker-compose for ii-agent with Docker sandboxes +# +# This setup uses local Docker containers for sandboxes instead of E2B cloud. +# All data stays on your machine — suitable for air-gapped / NDA environments. +# +# Usage: +# 1. Build the sandbox image first: +# docker build -t ii-agent-sandbox:latest -f e2b.Dockerfile . +# +# 2. Copy and configure environment: +# cp docker/.stack.env.local.example docker/.stack.env.local +# +# 3. Start the stack: +# docker compose -f docker/docker-compose.local.yaml \ +# --env-file docker/.stack.env.local up -d +# +# Key differences from docker-compose.stack.yaml: +# - SANDBOX_PROVIDER=docker (no E2B cloud dependency) +# - Backend gets Docker socket mount for spawning sandbox containers +# - Uses minio for local object storage +# - No separate sandbox-server or tool-server (monolith backend) +# - DEV_AUTH_ENABLED bypasses OAuth for local development + +services: + postgres: + image: postgres:15 + restart: unless-stopped + ports: + - "${POSTGRES_PORT:-5432}:5432" + environment: + POSTGRES_USER: ${POSTGRES_USER:-iiagent} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-iiagent} + POSTGRES_DB: ${POSTGRES_DB:-iiagentdev} + env_file: + - .stack.env.local + volumes: + - postgres-data-local:/var/lib/postgresql/data + - ./postgres-init/create-databases.sh:/docker-entrypoint-initdb.d/create-databases.sh:ro + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-iiagent} -d ${POSTGRES_DB:-iiagentdev}"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7-alpine + restart: unless-stopped + ports: + - "${REDIS_PORT:-6379}:6379" + command: ["redis-server", "--save", "60", "1", "--loglevel", "warning"] + volumes: + - redis-data-local:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + + minio: + image: minio/minio:latest + restart: unless-stopped + ports: + - "${MINIO_API_PORT:-9000}:9000" + - "${MINIO_CONSOLE_PORT:-9001}:9001" + environment: + MINIO_ROOT_USER: ${STORAGE_MINIO_ACCESS_KEY:-minioadmin} + MINIO_ROOT_PASSWORD: ${STORAGE_MINIO_SECRET_KEY:-minioadmin} + command: server /data --console-address ":9001" + volumes: + - minio-data-local:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 10s + timeout: 5s + retries: 5 + + frontend: + build: + context: .. + dockerfile: docker/frontend/Dockerfile + args: + BUILD_MODE: ${FRONTEND_BUILD_MODE:-production} + VITE_API_URL: ${VITE_API_URL:-http://localhost:8000} + VITE_GOOGLE_CLIENT_ID: ${VITE_GOOGLE_CLIENT_ID:-} + VITE_STRIPE_PUBLISHABLE_KEY: ${VITE_STRIPE_PUBLISHABLE_KEY:-} + VITE_SENTRY_DSN: ${VITE_SENTRY_DSN:-} + VITE_DISABLE_CHAT_MODE: ${VITE_DISABLE_CHAT_MODE:-false} + restart: unless-stopped + env_file: + - .stack.env.local + environment: + NODE_ENV: production + ports: + - "${FRONTEND_PORT:-1420}:3000" + + backend: + build: + context: .. + dockerfile: docker/backend/Dockerfile + init: true + restart: unless-stopped + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + minio: + condition: service_healthy + env_file: + - .stack.env.local + environment: + DATABASE_URL: ${DATABASE_URL} + REDIS_SESSION_URL: redis://redis:6379/1 + # ── Docker sandbox provider ── + SANDBOX_PROVIDER: docker + SANDBOX_DOCKER_IMAGE: ${SANDBOX_DOCKER_IMAGE:-ii-agent-sandbox:latest} + SANDBOX_DOCKER_NETWORK: ${COMPOSE_PROJECT_NAME:-ii-agent-local}_default + SANDBOX_PORT_RANGE_START: "30000" + SANDBOX_PORT_RANGE_END: "30999" + SANDBOX_LOCAL_MODE: "true" + SANDBOX_ORPHAN_CLEANUP_ENABLED: "true" + SANDBOX_ORPHAN_CLEANUP_INTERVAL_SECONDS: "300" + SANDBOX_DOCKER_HOST: ${SANDBOX_DOCKER_HOST:-localhost} + # ── Storage ── + STORAGE_PROVIDER: minio + STORAGE_MINIO_ENDPOINT: minio:9000 + STORAGE_MINIO_ACCESS_KEY: ${STORAGE_MINIO_ACCESS_KEY:-minioadmin} + STORAGE_MINIO_SECRET_KEY: ${STORAGE_MINIO_SECRET_KEY:-minioadmin} + STORAGE_BUCKET_NAME: ${STORAGE_MINIO_BUCKET:-ii-agent} + STORAGE_MINIO_SECURE: "false" + STORAGE_SERVE_BASE_URL: ${STORAGE_SERVE_BASE_URL:-} + # ── Auth ── + DEV_AUTH_ENABLED: "true" + ports: + - "${BACKEND_PORT:-8000}:8000" + volumes: + # Docker socket so backend can spawn sandbox containers + - /var/run/docker.sock:/var/run/docker.sock + - ii-agent-filestore-local:/.ii_agent + healthcheck: + test: ["CMD-SHELL", "curl -fsS http://localhost:8000/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + +volumes: + postgres-data-local: + redis-data-local: + minio-data-local: + ii-agent-filestore-local: diff --git a/docker/frontend/Dockerfile b/docker/frontend/Dockerfile index 266ccf96c..178bb8c91 100644 --- a/docker/frontend/Dockerfile +++ b/docker/frontend/Dockerfile @@ -2,9 +2,21 @@ FROM node:22-alpine AS builder WORKDIR /app COPY frontend/ . -RUN if [ -f yarn.lock ]; then yarn --frozen-lockfile && yarn build; \ +# Build-time environment variables for Vite +ARG VITE_API_URL=http://localhost:8000 +ARG VITE_GOOGLE_CLIENT_ID= +ARG VITE_STRIPE_PUBLISHABLE_KEY= +ARG VITE_SENTRY_DSN= +ARG VITE_DISABLE_CHAT_MODE=false +ENV VITE_API_URL=$VITE_API_URL +ENV VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID +ENV VITE_STRIPE_PUBLISHABLE_KEY=$VITE_STRIPE_PUBLISHABLE_KEY +ENV VITE_SENTRY_DSN=$VITE_SENTRY_DSN +ENV VITE_DISABLE_CHAT_MODE=$VITE_DISABLE_CHAT_MODE + +RUN if [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm i --frozen-lockfile && pnpm run build; \ + elif [ -f yarn.lock ]; then yarn --frozen-lockfile && yarn build; \ elif [ -f package-lock.json ]; then npm ci && npm run build; \ - elif [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm i --frozen-lockfile && pnpm run build; \ else echo "Lockfile not found." && exit 1; \ fi diff --git a/docker/sandbox/pyproject.toml b/docker/sandbox/pyproject.toml index 52d42faab..c9e0018f2 100644 --- a/docker/sandbox/pyproject.toml +++ b/docker/sandbox/pyproject.toml @@ -34,6 +34,9 @@ dependencies = [ "strictyaml>=1.7.0", # shared "playwright==1.55.0", + # A2A adapter server deps + "a2a-sdk==0.3.25", + "github-copilot-sdk>=0.1.25", ] [build-system] @@ -41,4 +44,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/ii_server", "src/ii_agent_tools"] +packages = ["src/ii_server", "src/ii_agent_tools", "src/ii_agent"] diff --git a/docker/sandbox/start-services.sh b/docker/sandbox/start-services.sh index 77acb1d8e..3789c4440 100644 --- a/docker/sandbox/start-services.sh +++ b/docker/sandbox/start-services.sh @@ -11,13 +11,40 @@ export HOME=/home/user export PATH="/home/user/.bun/bin:/app/ii_sandbox/.venv/bin:$PATH" -# Create workspace directory if it doesn't exist +# Create workspace directory if it doesn't exist and ensure ownership mkdir -p /workspace +chown -R "$(id -u):$(id -g)" /workspace cd /workspace +# Ensure X11 socket directory exists (Xvfb cannot create it as non-root) +mkdir -p /tmp/.X11-unix +chmod 1777 /tmp/.X11-unix + +# Start Xvfb virtual display +echo "Starting Xvfb..." +Xvfb :99 -screen 0 1920x1080x24 -ac & +export DISPLAY=:99 +export AGENT_BROWSER_HEADED=1 +sleep 1 + +# Start x11vnc server +echo "Starting x11vnc..." +x11vnc -display :99 -forever -nopw -shared -rfbport 5900 -bg -o /tmp/x11vnc.log +sleep 1 + +# Start window manager (needed for Chrome to render properly in VNC) +echo "Starting fluxbox window manager..." +fluxbox & +sleep 1 + +# Start noVNC websockify proxy (serves VNC over WebSocket on port 6080) +echo "Starting noVNC on port 6080..." +websockify --web=/usr/share/novnc 6080 localhost:5900 & +sleep 1 + # Start the sandbox server in the background echo "Starting sandbox server..." -tmux new-session -d -s sandbox-server-system-never-kill -c /workspace 'WORKSPACE_DIR=/workspace xvfb-run python -m ii_server.mcp.server' +tmux new-session -d -s sandbox-server-system-never-kill -c /workspace 'WORKSPACE_DIR=/workspace DISPLAY=:99 python -m ii_server.mcp.server' # Start code-server in the background echo "Starting code-server on port 9000..." @@ -48,9 +75,16 @@ else echo "✗ Code-server failed to start" fi +if pgrep -f "websockify" >/dev/null; then + echo "✓ noVNC is running on port 6080" +else + echo "✗ noVNC failed to start" +fi + echo "Services started. Container ready." echo "Sandbox server available" echo "Code-server available on port 9000" +echo "noVNC available on port 6080" # Keep the container running by waiting for all background processes wait diff --git a/docs/docs/architecture-local-to-cloud.md b/docs/docs/architecture-local-to-cloud.md new file mode 100644 index 000000000..33eacac2c --- /dev/null +++ b/docs/docs/architecture-local-to-cloud.md @@ -0,0 +1,533 @@ +# Architecture: Local to Cloud Deployment Path + +This document outlines the architectural evolution of ii-agent from a local development setup to a production-ready cloud deployment, with emphasis on security considerations for sensitive/NDA-protected data. + +## Overview + +ii-agent supports multiple deployment models through a pluggable sandbox provider architecture: + +| Stage | Sandbox Provider | Network Exposure | Data Location | Multi-tenant | +|-------|------------------|------------------|---------------|--------------| +| **Local Dev** | Docker | localhost only | Your machine | No | +| **Team/On-prem** | Docker + Auth | Internal network | Your infrastructure | Limited | +| **Cloud Production** | Kubernetes/gVisor | Internet-facing | Cloud VPC | Yes | + +--- + +## Stage 1: Local Development (Current) + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Single Developer Machine │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ Browser ──▶ Frontend (:1420) │ +│ │ │ +│ ▼ Socket.IO (WebSocket) │ +│ Backend (:8000) ◀──▶ Redis (session mgr) │ +│ │ │ +│ ┌────────┴────────┐ │ +│ ▼ ▼ │ +│ Sandbox-Server Tool-Server │ +│ (:8100) (:1236) │ +│ │ │ +│ │ Docker API + PortPoolManager │ +│ ▼ (host ports 30000-30999) │ +│ ┌─────────────────────────────────────────┐ │ +│ │ Ephemeral Sandbox Containers │ │ +│ │ ┌─────────────────────────────────┐ │ │ +│ │ │ Sandbox │ │ │ +│ │ │ Xvfb (:99) + x11vnc (:5900) │ │ │ +│ │ │ noVNC (:6080) │ │ │ +│ │ │ MCP Server (:6060) │ │ │ +│ │ │ code-server (:9000) │ │ │ +│ │ └─────────────────────────────────┘ │ │ +│ │ ┌─────────┐ ┌─────────┐ │ │ +│ │ │Sandbox 2│ │ ... │ │ │ +│ │ └─────────┘ └─────────┘ │ │ +│ └─────────────────────────────────────────┘ │ +│ │ +│ ┌──────────┐ ┌───────┐ │ +│ │ Postgres │ │ Redis │ │ +│ │ (:5433) │ │(:6379)│ │ +│ └──────────┘ └───────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Security Model + +| Aspect | Implementation | Risk Level | +|--------|----------------|------------| +| Network exposure | localhost only | ✅ Low | +| Authentication | JWT (optional demo mode) | ⚠️ Acceptable for dev | +| Sandbox isolation | Docker containers | ⚠️ Process-level | +| Data at rest | Local filesystem | ✅ Your control | +| Secrets | Environment variables | ⚠️ Acceptable for dev | + +### What Works Now + +- ✅ Full agent functionality without E2B/ngrok +- ✅ Local MCP server connectivity +- ✅ File operations with path traversal protection +- ✅ Command execution in isolated containers +- ✅ Resource limits (memory, CPU, PIDs) +- ✅ Basic capability dropping +- ✅ **Orphan cleanup** — Automatic removal of sandboxes with no active session (5-minute grace period, runs every 60s) +- ✅ **Local storage** — Files stored in MinIO (S3-compatible) instead of cloud storage (GCS) +- ✅ **Port pool management** — Ring-buffer host-port allocation (default 30000–30999, configurable via `SANDBOX_PORT_RANGE_START`/`SANDBOX_PORT_RANGE_END`). Thread-safe with startup scanning to reclaim ports from existing containers. Ring-buffer design prevents port conflicts when restarting stopped containers. +- ✅ **Sandbox restart** — Stopped/exited containers are automatically restarted when a user navigates to the session. Includes MCP health readiness check after restart. +- ✅ **noVNC browser handoff** — User interaction for CAPTCHAs/login via browser-based VNC viewer (noVNC :6080 → x11vnc :5900 → Xvfb :99 inside sandbox) +- ✅ **Socket.IO real-time transport** — Backend ↔ Browser communication over WebSocket with Redis-backed session manager (`AsyncRedisManager`) for horizontal scaling. Configured with `ping_timeout=300s`, `ping_interval=30s`, 10 MB max buffer. +- ✅ **Conversation state resilience** — Defense-in-depth sanitization of LLM thinking blocks on restore, runtime, save, and API call boundaries to prevent stuck sessions from corrupted state. + +### Known Limitations + +- Docker socket mount gives sandbox-server root-equivalent host access +- No network policy between sandbox containers +- No audit logging +- Single-user only + +### Quick Start + +```bash +# Configure +cp docker/.stack.env.local.example docker/.stack.env.local +# Edit: add JWT_SECRET_KEY and LLM API key + +# Build sandbox image + start all services +scripts/stack_control.sh --local build +scripts/stack_control.sh --local start + +# Or equivalently, rebuild a single service: +scripts/stack_control.sh --local rebuild backend +``` + +> `scripts/stack_control.sh` is the preferred interface. It wraps `docker compose` with the correct env-file, compose files, and build context. Run it without arguments to see the full command reference. + +--- + +## Stage 2: Team/On-Premises Deployment + +### Architecture Changes + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Internal Network / VPN │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────────────────────────┐ │ +│ │ Reverse Proxy (nginx) │ │ +│ │ - TLS termination │ │ +│ │ - Rate limiting │ │ +│ │ - IP allowlisting │ │ +│ └─────────────────┬────────────────────┘ │ +│ │ │ +│ ┌───────────┴───────────┐ │ +│ ▼ ▼ │ +│ ┌──────────┐ ┌──────────┐ │ +│ │ Frontend │ │ Backend │ │ +│ └──────────┘ └────┬─────┘ │ +│ │ │ +│ ┌──────────┴──────────┐ │ +│ ▼ ▼ │ +│ Sandbox-Server Tool-Server │ +│ (+ mTLS auth) (+ mTLS auth) │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ Sandboxes (isolated Docker network) │ │ +│ │ - No inter-container communication │ │ +│ │ - Egress restricted to MCP only │ │ +│ └─────────────────────────────────────────┘ │ +│ │ +│ ┌──────────┐ ┌───────┐ ┌────────────────┐ │ +│ │ Postgres │ │ Redis │ │ MCP Server │ │ +│ │ (TLS) │ │ (TLS) │ │ (internal only)│ │ +│ └──────────┘ └───────┘ └────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Required Changes + +#### 1. Add Service-to-Service Authentication + +```yaml +# docker-compose.team.yaml additions +services: + sandbox-server: + environment: + # Require mTLS or JWT for API calls + REQUIRE_AUTH: "true" + AUTH_JWT_SECRET: ${SANDBOX_AUTH_SECRET} +``` + +#### 2. Create Isolated Docker Network + +```yaml +networks: + sandbox-net: + driver: bridge + internal: true # No external access + driver_opts: + com.docker.network.bridge.enable_icc: "false" # No inter-container +``` + +#### 3. Add Reverse Proxy with TLS + +```nginx +# nginx.conf +upstream backend { + server backend:8000; +} + +server { + listen 443 ssl; + ssl_certificate /etc/ssl/certs/ii-agent.crt; + ssl_certificate_key /etc/ssl/private/ii-agent.key; + + # Rate limiting + limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s; + + location /api/ { + limit_req zone=api burst=20; + proxy_pass http://backend; + } +} +``` + +#### 4. Implement Audit Logging + +```python +# Add to sandbox-server +import structlog + +logger = structlog.get_logger() + +async def create_sandbox(..., user_id: str): + logger.info( + "sandbox_created", + user_id=user_id, + sandbox_id=sandbox_id, + action="create" + ) +``` + +### Security Improvements + +| Aspect | Change | Risk Reduction | +|--------|--------|----------------| +| Network | TLS everywhere, mTLS for services | High | +| Authentication | OIDC/SAML integration | High | +| Network isolation | Isolated Docker network | Medium | +| Audit | Structured logging to SIEM | Medium | +| Rate limiting | Nginx/HAProxy rate limits | Medium | + +--- + +## Stage 3: Cloud Production (AWS/GCP/Azure) + +### Target Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ AWS VPC │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ Public Subnet │ │ +│ │ ┌─────────────┐ │ │ +│ │ │ ALB │◀── WAF + Shield │ │ +│ │ │ (HTTPS) │ │ │ +│ │ └──────┬──────┘ │ │ +│ └──────────┼──────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌──────────┼──────────────────────────────────────────────────────┐ │ +│ │ │ Private Subnet (EKS) │ │ +│ │ ▼ │ │ +│ │ ┌─────────────────────────────────────────────────────────┐ │ │ +│ │ │ EKS Cluster │ │ │ +│ │ │ │ │ │ +│ │ │ ┌──────────┐ ┌──────────────┐ ┌──────────────┐ │ │ │ +│ │ │ │ Frontend │ │ Backend │ │ Tool-Server │ │ │ │ +│ │ │ │ (Pod) │ │ (Pod) │ │ (Pod) │ │ │ │ +│ │ │ └──────────┘ └──────┬───────┘ └──────────────┘ │ │ │ +│ │ │ │ │ │ │ +│ │ │ ▼ │ │ │ +│ │ │ ┌─────────────────┐ │ │ │ +│ │ │ │ Sandbox-Server │ │ │ │ +│ │ │ │ (Pod + IAM Role)│ │ │ │ +│ │ │ └────────┬────────┘ │ │ │ +│ │ │ │ │ │ │ +│ │ │ ┌───────────────────┴───────────────────┐ │ │ │ +│ │ │ │ Sandbox Namespace │ │ │ │ +│ │ │ │ ┌─────────┐ ┌─────────┐ │ │ │ │ +│ │ │ │ │Sandbox 1│ │Sandbox 2│ ... │◀─┐ │ │ │ +│ │ │ │ │ (gVisor)│ │ (gVisor)│ │ │ │ │ │ +│ │ │ │ └─────────┘ └─────────┘ │ │ │ │ │ +│ │ │ │ │ │ │ │ │ +│ │ │ │ NetworkPolicy: deny-all + allow-mcp │ │ │ │ │ +│ │ │ └────────────────────────────────────────┘ │ │ │ │ +│ │ │ │ │ │ │ +│ │ └───────────────────────────────────────────────┼─────────┘ │ │ +│ │ │ │ │ +│ │ ┌────────────────┐ ┌────────────────┐ │ │ │ +│ │ │ RDS Postgres │ │ ElastiCache │ │ │ │ +│ │ │ (encrypted) │ │ (Redis) │ │ │ │ +│ │ └────────────────┘ └────────────────┘ │ │ │ +│ │ │ │ │ +│ └───────────────────────────────────────────────────┼─────────────┘ │ +│ │ │ +│ ┌───────────────────────────────────────────────────┼─────────────┐ │ +│ │ Private Subnet (Data) │ │ │ +│ │ ▼ │ │ +│ │ ┌────────────────────────────────────────────────────────┐ │ │ +│ │ │ Your MCP Server (Fargate) │ │ │ +│ │ │ - IAM Role for data access │ │ │ +│ │ │ - VPC endpoint for S3/Secrets Manager │ │ │ +│ │ │ - No internet access │ │ │ +│ │ └────────────────────────────────────────────────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ + +External Services (via VPC Endpoints): +├── AWS Secrets Manager (API keys) +├── CloudWatch (logs, metrics) +├── S3 (artifacts, optional) +└── ECR (container images) +``` + +### Implementation Requirements + +#### 1. Kubernetes Sandbox Provider + +Replace Docker provider with Kubernetes-native sandbox management: + +```python +# src/ii_agent/agents/sandboxes/kubernetes.py (new file) +class KubernetesSandbox(Sandbox): + """ + Kubernetes-native sandbox provider. + + Creates pods with gVisor runtime for VM-level isolation + without the overhead of actual VMs. + """ + + async def create(self, ...): + pod_manifest = { + "apiVersion": "v1", + "kind": "Pod", + "metadata": { + "name": f"sandbox-{sandbox_id}", + "namespace": "ii-agent-sandboxes", + "labels": {"ii-agent.sandbox": "true"} + }, + "spec": { + "runtimeClassName": "gvisor", # VM-level isolation + "securityContext": { + "runAsNonRoot": True, + "seccompProfile": {"type": "RuntimeDefault"} + }, + "containers": [{ + "name": "sandbox", + "image": self.config.sandbox_image, + "resources": { + "limits": {"memory": "2Gi", "cpu": "2"}, + "requests": {"memory": "512Mi", "cpu": "0.5"} + }, + "securityContext": { + "allowPrivilegeEscalation": False, + "capabilities": {"drop": ["ALL"]} + } + }] + } + } +``` + +#### 2. Network Policies + +```yaml +# k8s/network-policy.yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: sandbox-isolation + namespace: ii-agent-sandboxes +spec: + podSelector: + matchLabels: + ii-agent.sandbox: "true" + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ii-agent-system + podSelector: + matchLabels: + app: sandbox-server + egress: + # Allow DNS + - to: + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 + # Allow MCP server only + - to: + - namespaceSelector: + matchLabels: + name: ii-agent-data + podSelector: + matchLabels: + app: mcp-server + ports: + - protocol: TCP + port: 6060 +``` + +#### 3. Pod Security Standards + +```yaml +# k8s/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: ii-agent-sandboxes + labels: + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/enforce-version: latest +``` + +#### 4. IAM Roles for Service Accounts (IRSA) + +```yaml +# k8s/service-account.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sandbox-server + namespace: ii-agent-system + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/ii-agent-sandbox-server +--- +# IAM Policy (Terraform) +resource "aws_iam_role_policy" "sandbox_server" { + role = aws_iam_role.sandbox_server.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "secretsmanager:GetSecretValue" + ] + Resource = [ + "arn:aws:secretsmanager:*:*:secret:ii-agent/*" + ] + } + ] + }) +} +``` + +#### 5. Secrets Management + +```python +# src/ii_agent/core/config/sandbox.py additions +import boto3 + +def get_secret(secret_name: str) -> str: + """Retrieve secret from AWS Secrets Manager.""" + client = boto3.client('secretsmanager') + response = client.get_secret_value(SecretId=secret_name) + return response['SecretString'] + +# Usage +config = SandboxSettings( + jwt_secret=get_secret("ii-agent/jwt-secret"), + # Never in environment variables +) +``` + +### Security Comparison + +| Aspect | Local Docker | Cloud K8s | +|--------|--------------|-----------| +| Container isolation | Process namespace | gVisor (VM-level) | +| Network isolation | Bridge network | NetworkPolicy (deny-all) | +| Host access | Docker socket (root) | No host access | +| Secrets | Env vars | Secrets Manager + IRSA | +| Multi-tenant | ❌ No | ✅ Yes (namespace isolation) | +| Audit logging | Optional | CloudWatch + CloudTrail | +| Compliance | Manual | SOC2/HIPAA capable | + +--- + +## Migration Checklist + +### Local → Team + +- [ ] Generate TLS certificates (or use Let's Encrypt) +- [ ] Configure reverse proxy with rate limiting +- [ ] Set up OIDC/SAML authentication +- [ ] Create isolated Docker network for sandboxes +- [ ] Implement audit logging +- [ ] Document incident response procedures + +### Team → Cloud + +- [ ] Provision EKS cluster with gVisor runtime +- [ ] Implement KubernetesSandbox provider +- [ ] Configure NetworkPolicies +- [ ] Set up IRSA for service accounts +- [ ] Migrate secrets to Secrets Manager +- [ ] Configure CloudWatch logging +- [ ] Set up ALB with WAF +- [ ] Implement horizontal pod autoscaling +- [ ] Configure pod disruption budgets +- [ ] Set up monitoring (Prometheus/Grafana or CloudWatch) +- [ ] Penetration testing +- [ ] Compliance review (if required) + +--- + +## Cost Considerations + +| Component | Local | Team (On-prem) | Cloud (AWS) | +|-----------|-------|----------------|-------------| +| Compute | Your hardware | Your servers | ~$200-500/mo (EKS + nodes) | +| Database | Docker | Your DB | ~$50-200/mo (RDS) | +| Networking | Free | Your network | ~$20-50/mo (NAT, ALB) | +| Secrets | N/A | HashiCorp Vault | ~$5/mo (Secrets Manager) | +| Monitoring | Local | Prometheus | ~$50-100/mo (CloudWatch) | +| **Total** | **$0** | **Your infra** | **~$325-850/mo** | + +--- + +## Timeline Estimate + +| Phase | Effort | Prerequisites | +|-------|--------|---------------| +| Local (done) | 0 | Docker installed | +| Team deployment | 1-2 weeks | TLS certs, auth provider | +| Cloud MVP | 2-4 weeks | AWS account, K8s experience | +| Production hardening | 2-4 weeks | Security review, compliance | + +--- + +## References + +- [Kubernetes Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) +- [gVisor Container Sandbox](https://gvisor.dev/) +- [AWS EKS Best Practices](https://aws.github.io/aws-eks-best-practices/) +- [OWASP Container Security](https://cheatsheetseries.owasp.org/cheatsheets/Docker_Security_Cheat_Sheet.html) diff --git a/docs/docs/core-infrastructure.md b/docs/docs/core-infrastructure.md new file mode 100644 index 000000000..b172f3aec --- /dev/null +++ b/docs/docs/core-infrastructure.md @@ -0,0 +1,71 @@ +--- +id: core-infrastructure +title: Core Infrastructure +sidebar_label: Core Infrastructure +sidebar_position: 5 +description: Configure Postgres, Redis, and host ports so II-Agent services can talk to each other. +--- + +# Core Infrastructure + +These variables keep the underlying databases, caches, and network ports consistent across every II-Agent container. Start with the safe defaults from `docker/.stack.env.example`, then adjust only when you have conflicts. + +## Postgres credentials + +Variables: `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB`, `POSTGRES_PORT` + +1. Choose credentials you are comfortable using for local development: + ```bash + POSTGRES_USER=app + POSTGRES_PASSWORD=changeme + POSTGRES_DB=ii + POSTGRES_PORT=5432 + ``` +2. Update the same values anywhere else they appear (Prisma, backend `.env` files, local clients). +3. If port `5432` conflicts with a local Postgres install, change `POSTGRES_PORT` (e.g., `55432`) and update your connection strings. + +## Backend connection string + +Variable: `DATABASE_URL` + +- Use the async driver: `postgresql+asyncpg://USER:PASS@postgres:5432/ii`. +- Keep the host as `postgres` so services inside Docker can resolve it. + +## Sandbox database + +Variables: `SANDBOX_DB_NAME`, `SANDBOX_DATABASE_URL` + +- Only required when the sandbox service uses a separate database. +- You can reuse the main Postgres host with a new database name to keep management simple. + +## Redis + +Variable: `REDIS_PORT` + +- Defaults to `6379`. Change only if another local process already binds that port. +- Containers reference Redis by service name (`redis`), so host-only changes do not affect internal networking. + +## HTTP-facing ports + +Variables: `BACKEND_PORT`, `FRONTEND_PORT`, `SANDBOX_SERVER_PORT`, `TOOL_SERVER_PORT`, `NGROK_METRICS_PORT`, `MCP_PORT` + +- Map each to an open host port. The defaults (8000/3000/9000/etc.) usually work. +- When a collision happens, bump the conflicting port and update any URLs or CLIs that pointed to the old value (e.g., `VITE_API_URL`). + +## Docker sandbox port pool + +When running in local Docker mode (`SANDBOX_PROVIDER=docker`), the sandbox server dynamically maps container ports to the host from the range **30000-30999**. Each sandbox reserves 6 host ports (MCP, code-server, noVNC, and spares), allowing approximately 166 concurrent sandboxes. + +The frontend automatically rewrites `localhost` URLs to the browser's hostname so sandbox services remain accessible when the UI is accessed from a different machine on the LAN. + +## Validation checklist + +1. Run `./scripts/run_stack.sh --build` and ensure Docker does **not** report binding conflicts. +2. Use `docker compose ps` to inspect which host ports map to each container. +3. From your host, connect to the services directly: + ```bash + psql postgresql://app:changeme@localhost:${POSTGRES_PORT}/ii + redis-cli -p ${REDIS_PORT} ping + curl http://localhost:${BACKEND_PORT}/health + ``` +4. Document any custom port numbers in your team docs so other contributors can reuse them. diff --git a/docs/docs/feature-branch-analysis.md b/docs/docs/feature-branch-analysis.md new file mode 100644 index 000000000..5c20f4771 --- /dev/null +++ b/docs/docs/feature-branch-analysis.md @@ -0,0 +1,428 @@ +# Feature Branch Dependency Analysis + +> **Branch:** Feature branch vs `develop` +> **Summary:** 124 files changed, 16,024 insertions(+), 295 deletions(-) +> **Primary Feature:** Local Docker Sandbox - Air-gapped deployment without E2B cloud + +--- + +## Executive Summary + +This feature branch implements a **complete local-only deployment mode** for ii-agent, eliminating the dependency on E2B cloud sandboxes and GCS storage. The changes enable: + +1. **Docker-based sandboxes** running on the local host +2. **Local filesystem storage** replacing Google Cloud Storage +3. **Orphan cleanup system** to manage sandbox lifecycle +4. **Extended token budgets** for large context models + +--- + +## Tier 0: Configuration & Constants (Foundation Layer) + +### Token Budget Constants +**File:** [src/ii_agent/utils/constants.py](../src/ii_agent/utils/constants.py) + +| Constant | Value | Purpose | +|----------|-------|---------| +| `TOKEN_BUDGET_NORMAL` | 200,000 | Standard context window | +| `TOKEN_BUDGET_EXTENDED` | 800,000 | **NEW** - Extended context models (Claude 4.5) | + +### Agent Configuration +**File:** [src/ii_agent/core/config/settings.py](../src/ii_agent/core/config/settings.py) + +| Setting | Old Default | New Default | Notes | +|---------|-------------|-------------|-------| +| `storage_provider` | `"gcs"` | `"local"` | Enables local-first deployment | + +### Sandbox Configuration +**File:** [src/ii_agent/core/config/sandbox.py](../src/ii_agent/core/config/sandbox.py) + +**New Configuration Options:** + +```python +class SandboxSettings(BaseSettings): + # Sandbox provider selection + provider: SandboxProvider = "e2b" # env: SANDBOX_PROVIDER + + # Docker-specific settings + docker_image: str = "ii-agent-sandbox:latest" # env: SANDBOX_DOCKER_IMAGE + docker_network: str = "ii-agent-local_ii-network" # env: SANDBOX_DOCKER_NETWORK + docker_host: str = "localhost" # env: SANDBOX_DOCKER_HOST (LAN IP for remote browser access) + port_range_start: int = 30000 # env: SANDBOX_PORT_RANGE_START + port_range_end: int = 30999 # env: SANDBOX_PORT_RANGE_END + + # Orphan cleanup settings + local_mode: bool = False # Enable Docker sandbox features + orphan_cleanup_enabled: bool = True # Can be disabled + orphan_cleanup_interval_seconds: int = 60 + backend_url: str = "http://backend:8000" # For session verification + + # Container service ports + mcp_server_port: int = 6060 + code_server_port: int = 9000 + novnc_port: int = 6080 +``` + +### Base Classes (API Contracts) + +**Storage Base** - [src/ii_agent/core/storage/base.py](../src/ii_agent/core/storage/base.py) +- No changes to interface - LocalStorage implements existing contract + +**Sandbox Base** - [src/ii_agent/agents/sandboxes/base.py](../src/ii_agent/agents/sandboxes/base.py) +- `expose_port(port: int, external: bool = False)` - **NEW parameter** + - `external=False`: Returns container-to-container URL (Docker network) + - `external=True`: Returns browser-accessible URL (host port) + +--- + +## Tier 1: Infrastructure Components (Building Blocks) + +### Port Pool Manager (NEW) +**File:** [src/ii_agent/agents/sandboxes/port_manager.py](../src/ii_agent/agents/sandboxes/port_manager.py) (480 lines) + +A singleton service managing port allocation for Docker sandbox containers. + +**Architecture:** +``` +┌─────────────────────────────────────────────────────────────┐ +│ PortPoolManager │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ +│ │ Port Pool │ │ Allocations │ │ Orphan Cleanup │ │ +│ │ 30000-30999 │ │ by Sandbox │ │ Background │ │ +│ └──────────────┘ └──────────────┘ └──────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Key Components:** + +| Class | Purpose | +|-------|---------| +| `PortAllocation` | Single port mapping (host_port, container_port, purpose) | +| `SandboxPortSet` | All ports for one sandbox + creation timestamp | +| `PortPoolManager` | Singleton managing allocation/deallocation | + +**Port Range:** +- **Range:** 30000-30999 (1,000 ports) +- **Per Sandbox:** 6 ports (MCP:6060, code-server:9000, noVNC:6080, dev:3000, vite:5173, http:8080) +- **Capacity:** ~166 concurrent sandboxes + +**Key Features:** +1. **Thread-safe allocation** using `threading.Lock` +2. **Ring-buffer allocation** — Cursor always advances forward, wrapping around the range. Released ports are not reused until the cursor cycles back, preventing conflicts when restarting stopped containers. +3. **Startup scanning** - Detects existing ii-sandbox containers on restart, positions cursor past highest allocated port +4. **Orphan cleanup** - Background task releases ports for dead containers +5. **Graceful initialization** - Handles Docker not running + +### Local Storage Provider (NEW) +**File:** [src/ii_agent/core/storage/local.py](../src/ii_agent/core/storage/local.py) (175 lines) + +**Also duplicated for tool server:** +**File:** [src/ii_server/integrations/storage/local.py](../src/ii_server/integrations/storage/local.py) (172 lines) + +Replaces GCS for file storage in local deployments. + +**Features:** +| Feature | Implementation | +|---------|----------------| +| Path traversal protection | `os.path.abspath().startswith(base_path)` | +| Content-type storage | `.meta` sidecar files | +| URL download | Browser-like headers to avoid bot detection | +| Public URL generation | `{TOOL_SERVER_URL}/storage/{path}` | + +**Storage Factory Updates:** +**File:** [src/ii_agent/core/storage/factory.py](../src/ii_agent/core/storage/factory.py) + +```python +def create_storage_client(config: StorageConfig) -> BaseStorage: + if config.storage_provider == "local": + return LocalStorage(config) # NEW + if config.storage_provider == "gcs": + return GCS(config) + raise ValueError(f"Unknown storage provider: {config.storage_provider}") +``` + +--- + +## Tier 2: Docker Sandbox Implementation (Core Feature) + +### DockerSandbox Provider (NEW) +**File:** [src/ii_agent/agents/sandboxes/docker.py](../src/ii_agent/agents/sandboxes/docker.py) (974 lines) + +The core implementation replacing E2B cloud sandboxes. + +**Class Hierarchy:** +``` +Sandbox (Abstract, agents/sandboxes/base.py) + ├── E2BSandbox (Cloud - existing) + └── DockerSandbox (Local - NEW) +``` + +**Container Lifecycle:** +``` +create() ────► Container Created ────► Running + │ + ▼ + Port Allocated + (ring-buffer via PortPoolManager) + │ + ▼ + Services Ready + (MCP :6060, code-server :9000, noVNC :6080) + │ + ▼ +connect() ◀── exited/paused ──► start()/unpause() + readiness check + │ + ▼ +kill() ────────► Container Removed ────► Ports Released + Volume Cleaned +``` + +**Key Methods:** + +| Method | Purpose | +|--------|---------| +| `create()` | Create container, allocate ports, wait for MCP ready | +| `connect()` | Re-attach to existing container, restart if stopped, readiness check | +| `run_command()` | Execute shell command with timeout | +| `read_file()` / `write_file()` | File transfer via docker cp (tar archives) | +| `expose_port()` | Return host-mapped port URL (uses `SANDBOX_DOCKER_HOST`) | +| `kill()` | Stop container, release ports, clean up volume | + +**Security Features:** +1. **Path validation** — Prevents escaping sandbox directory (`ALLOWED_WORKSPACE_BASES`) +2. **Resource limits** — `mem_limit=3072m`, `cpu_quota=200000` (2 CPUs), `pids_limit=512` +3. **Capability dropping** — `cap_drop=["ALL"]`, `cap_add=["CHOWN", "SETUID", "SETGID", "DAC_OVERRIDE"]` +4. **No privilege escalation** — `security_opt=["no-new-privileges"]` +5. **Network isolation** — Containers on dedicated Docker network + +**Port Mapping Strategy:** +``` +Browser Request Docker Container + │ │ + ▼ ▼ + localhost:30001 ──────────► container:8080 + (host port) expose_port (container port) +``` + +--- + +## Tier 3: Orchestration (Lifecycle Management) + +### Sandbox Controller - Orphan Cleanup (NEW) +**File:** [src/ii_agent/agents/sandboxes/orphan_cleanup.py](../src/ii_agent/agents/sandboxes/orphan_cleanup.py) + +**New Feature:** Background cleanup of orphaned sandboxes (~350 new lines) + +**Problem Solved:** +When a chat session is deleted in the backend, the sandbox continues running. The orphan cleanup system detects and removes these orphans. It also sweeps Docker directly for zombie containers that have no matching DB record (e.g. from bulk session deletions or application crashes). + +**Flow:** +``` +┌─────────────────────────────────────────────────────────────┐ +│ run_orphan_cleanup_loop() │ +│ │ +│ Pass 1 — _cleanup_orphans() (DB-driven): │ +│ 1. List all non-deleted sandbox records │ +│ 2. For each sandbox: │ +│ a. Skip if created < 5 minutes ago (grace period) │ +│ b. Check if session is deleted or missing │ +│ c. If orphaned → kill container, release ports/volume │ +│ │ +│ Pass 2 — _pause_stale_sandboxes(): │ +│ 1. Pause running sandboxes whose sessions are idle │ +│ │ +│ Pass 3 — _cleanup_docker_zombies() (Docker-level sweep): │ +│ 1. List all containers with ii-agent.sandbox=true label │ +│ 2. Query DB for active sandbox provider_sandbox_ids │ +│ 3. For unmatched containers past grace period: │ +│ → force-remove container, clean volume, release ports │ +│ │ +│ Sleep for orphan_cleanup_interval_seconds │ +│ Repeat │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Configuration:** +```python +local_mode: bool = False # Must be True to enable +orphan_cleanup_enabled: bool = True # Can disable for debugging +orphan_cleanup_interval_seconds: int = 60 # Check frequency +backend_url: str = "http://backend:8000" # Backend API endpoint +``` + +**Grace Period:** +- New sandboxes are protected for **5 minutes** after creation +- Prevents race condition during session initialization + +--- + +## Tier 4: Integration Layer (API & Infrastructure) + +### Backend API - File Endpoints +**File:** [src/ii_agent/files/router.py](../src/ii_agent/files/router.py) + +**New Endpoints for Local Storage:** + +| Method | Endpoint | Purpose | +|--------|----------|---------| +| `PUT` | `/files/upload/{path:path}` | Upload file to local storage | +| `GET` | `/files/{path:path}` | Download file with token validation | + +**Token-Based Authentication:** +- Files accessed via signed URLs with `token` query parameter +- Tokens are HMAC signatures with expiration + +### Tool Server - Storage Endpoint +**File:** [src/ii_server/integrations/app/main.py](../src/ii_server/integrations/app/main.py) + +**New Endpoint:** + +| Method | Endpoint | Purpose | +|--------|----------|---------| +| `GET` | `/storage/{file_path:path}` | Serve files from LocalStorage | + +Only active when `STORAGE_PROVIDER=local`. Returns 404 for GCS mode. + +### Docker Compose - Local Stack (NEW) +**File:** [docker/docker-compose.local.yaml](../docker/docker-compose.local.yaml) (194 lines) + +Complete local deployment without any cloud dependencies. + +**Services:** + +The local stack uses a **monolith backend** — no separate sandbox-server or tool-server: + +```yaml +services: + postgres: # Database (:5433) + redis: # Cache/Queue (:6379) + minio: # S3-compatible storage (:9000/:9001) + frontend: # React UI (:1420) + backend: # FastAPI server + sandbox management (:8000) +``` + +**Key Environment Variables:** +```yaml +backend: + SANDBOX_PROVIDER: docker + SANDBOX_LOCAL_MODE: "true" + SANDBOX_DOCKER_HOST: ${SANDBOX_DOCKER_HOST:-localhost} + STORAGE_PROVIDER: local +``` + +**Volume Mounts:** +```yaml +backend: + volumes: + - /var/run/docker.sock:/var/run/docker.sock # Docker access +``` + +--- + +## Dependency Graph + +``` + ┌─────────────────────┐ + │ Configuration │ + │ (constants, config)│ + └─────────┬───────────┘ + │ + ┌───────────────┼───────────────┐ + ▼ ▼ ▼ + ┌─────────────────┐ ┌──────────────┐ ┌──────────────┐ + │ PortPoolManager│ │ LocalStorage │ │ Base Classes │ + │ (Tier 1) │ │ (Tier 1) │ │ (Tier 0) │ + └────────┬────────┘ └──────┬───────┘ └──────┬───────┘ + │ │ │ + ▼ │ │ + ┌─────────────────┐ │ │ + │ DockerSandbox │◄───────┴────────────────┘ + │ (Tier 2) │ + └────────┬────────┘ + │ + ▼ + ┌─────────────────┐ + │SandboxController│ + │ Orphan Cleanup │ + │ (Tier 3) │ + └────────┬────────┘ + │ + ▼ + ┌─────────────────┐ + │ API Routes │ + │ Docker Compose │ + │ (Tier 4) │ + └─────────────────┘ +``` + +--- + +## Migration Guide + +### From E2B Cloud to Local Docker + +1. **Prerequisites:** + - Docker installed and running + - Docker Compose v2+ + - At least 8GB RAM available + +2. **Environment Variables:** + ```bash + # Required changes + SANDBOX_PROVIDER=docker + STORAGE_PROVIDER=local + LOCAL_MODE=true + + # Not required for local mode + # E2B_API_KEY + # GCS_BUCKET_NAME + # GCS_PROJECT_ID + ``` + +3. **Start Local Stack:** + ```bash + docker compose -f docker/docker-compose.local.yaml up -d + ``` + +4. **Verify:** + - Check sandbox-server logs for "Using Docker sandbox provider" + - Create a test chat and verify container creation + - Upload a file and verify local storage + +--- + +## Security Considerations + +| Component | Security Measure | +|-----------|-----------------| +| DockerSandbox | Path validation, command sanitization, resource limits | +| LocalStorage | Path traversal protection, base path enforcement | +| Port Manager | Ring-buffer allocation prevents port conflicts on sandbox restart | +| Orphan Cleanup | Grace period prevents premature termination | +| File Endpoints | Token-based signed URLs with expiration | + +--- + +## Performance Notes + +| Metric | E2B Cloud | Local Docker | +|--------|-----------|--------------| +| Sandbox creation | 5-10s | 1-3s | +| File upload | Network dependent | Local disk speed | +| Concurrent sandboxes | Limited by API quota | ~166 (port pool, ring-buffer) | +| Network latency | Cloud RTT | Negligible | + +--- + +## Files Changed Summary + +| Category | Files | Lines Changed | +|----------|-------|---------------| +| New Docker Sandbox | 2 | +1,454 | +| New Local Storage | 4 | +400 | +| Orphan Cleanup | 1 | +120 | +| Configuration | 4 | +80 | +| Docker Compose | 2 | +200 | +| API Endpoints | 2 | +100 | +| Tests | ~20 | +3,000 | +| Documentation | 5 | +1,500 | +| **Total** | **124** | **+16,024 / -295** | diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md new file mode 100644 index 000000000..2aaac88b3 --- /dev/null +++ b/docs/docs/getting-started.md @@ -0,0 +1,225 @@ +--- +id: getting-started +title: Docker Stack Environment +sidebar_label: Getting Started +sidebar_position: 2 +description: Bring up the II-Agent Docker stack, configure the correct env file for your mode, and understand required services. +--- + +# Docker Stack Environment Setup + +Use this runbook whenever you need to spin up the full II-Agent Docker stack (Postgres, Redis, backend, sandbox server, tool server, frontend, and ngrok). + +Environment file naming by mode: + +- Full stack mode (`docker-compose.stack.yaml`): use `docker/.stack.env`. +- Local Docker sandbox mode (`docker-compose.local.yaml`): use `docker/.stack.env.local`. + +## Before you start + +- Docker Desktop or Docker Engine with Compose v2 (Linux containers enabled). +- Node.js 18+ and Python 3.10+ (only required when running services outside Docker). +- API access for at least one LLM provider (OpenAI-compatible, Anthropic, Gemini, etc.). +- Google Cloud service-account JSON if you plan to store assets on GCS or call Vertex AI. + +## Quick start + +1. Copy the sample file: + ```bash + cp docker/.stack.env.example docker/.stack.env + ``` +2. Fill every placeholder marked `replace-me` or `replace-with-your-token`. Use the [Required Environment Variables](./required-environment-variables/index.md) guide as you go; optional integrations live in [Optional Environment Variables](./optional-environment-variables/index.md). +3. Launch the stack: + ```bash + ./scripts/run_stack.sh --build + ``` + - The helper script checks for `.stack.env` and runs `docker compose -f docker/docker-compose.stack.yaml --env-file docker/.stack.env up`. + - Drop the `--build` flag after the first boot to reuse images. + - Stop the stack with `docker compose -f docker/docker-compose.stack.yaml down`. + +> **Local-only mode (no cloud services):** If you don't need E2B, ngrok, or GCS you can run entirely with Docker sandboxes. See the [Local Docker Sandbox](./local-docker-sandbox.md) guide and use `docker-compose.local.yaml` instead. + +For local-only mode, do not reuse `docker/.stack.env` as your main config file. Use `docker/.stack.env.local`. + +### Migration from previous local env files + +If your existing `.stack.env.local` references the old storage variables, update them: + +| Old variable | New variable | Notes | +| --- | --- | --- | +| `STORAGE_PROVIDER=local` | `STORAGE_PROVIDER=minio` | The `local` filesystem provider has been removed. Use MinIO for local deployments. | +| `LOCAL_STORAGE_URL_BASE` | *(remove)* | No longer used. | +| `LOCAL_STORAGE_INTERNAL_URL_BASE` | *(remove)* | No longer used. | +| `STORAGE_LOCAL_SERVE_URL` | `STORAGE_SERVE_BASE_URL` | Set to the browser-reachable backend URL (e.g. `http://192.168.2.2:8000`). When set, storage URLs route through the backend proxy instead of directly to MinIO. | + +## Required variables overview + +| Section | Key variables | Why they matter | +| --- | --- | --- | +| Frontend build | `FRONTEND_BUILD_MODE`, `VITE_API_URL`, `VITE_GOOGLE_CLIENT_ID`, `VITE_STRIPE_PUBLISHABLE_KEY`, `VITE_SENTRY_DSN`, `VITE_DISABLE_CHAT_MODE` | Control how II-Agent's UI is compiled and which backend endpoint it targets. | +| Networking / tunnels | `NGROK_AUTHTOKEN`, `NGROK_REGION`| Expose the stack over HTTPS for remote demos or callback URLs. | +| Host paths | `GOOGLE_APPLICATION_CREDENTIALS` | Mount a GCP service-account JSON into containers. | +| LLM + auth | `LLM_CONFIGS`, `RESEARCHER_AGENT_CONFIG`, `GOOGLE_CLIENT_ID`, `GOOGLE_REDIRECT_URI`, `ACCESS_TOKEN_EXPIRE_MINUTES`, `ENHANCE_PROMPT_OPENAI_API_KEY` | Give II-Agent access to models and configure OAuth/JWT behavior. | +| Storage | `SLIDE_ASSETS_PROJECT_ID`, `SLIDE_ASSETS_BUCKET_NAME`, `FILE_UPLOAD_*`, `AVATAR_*`, `CUSTOM_DOMAIN` | Buckets that persist agent-generated assets. | +| Backend sandbox | `SANDBOX_TEMPLATE_ID`, `TIME_TIL_CLEAN_UP` | Define how on-demand sandboxes are provisioned and reclaimed. | +| Tool server | `STORAGE_CONFIG__GCS_*` | Buckets used by the tool server baseline. | +| Sandbox server | `E2B_API_KEY`, `E2B_TEMPLATE_ID` | Credentials for the hosted sandbox provider (not needed for local-only Docker mode). | +| Core infra | `POSTGRES_*`, `DATABASE_URL`, `SANDBOX_DB_*`, `REDIS_PORT`, `BACKEND_PORT`, `FRONTEND_PORT`, `SANDBOX_SERVER_PORT`, `TOOL_SERVER_PORT`, `NGROK_METRICS_PORT`, `MCP_PORT` | Databases and host port mappings that every service relies on. | + +The required guide links to the detailed setup pages for each section (frontend env, tunnels, host paths, etc.). Keep it open while editing the env file for your selected mode (`docker/.stack.env` or `docker/.stack.env.local`). + +## Optional feature sets + +Some integrations sit behind extra credentials. Configure them after the base agent runs cleanly: + +- Payments and billing. +- Media (image/video) generation. +- Search providers (web, image, visit-level browsing). +- Tool-server specific LLM overrides. +- Database automation (Neon). + +## Boot validation + +1. Run `./scripts/run_stack.sh --build` and confirm all containers are healthy. +2. Visit `http://localhost:` and send a request through II-Agent. +3. Check `docker compose logs -f` for missing variable errors or failing services. +4. When ready to expose the stack, ensure ngrok connected successfully (`http://localhost:`). + +With the stack online, you can iterate on II-Agent flows, add tools, and capture Proof-of-Benefit evidence from real executions. + +## Expected local warnings + +During local development and unit test runs, these warning classes are expected unless you are specifically testing those integrations: + +- `COMPOSIO_API_KEY is not set`: expected when Composio connector features are not configured. +- Pydantic v2 deprecation warnings (`class-based config`, `json_encoders`): expected from current dependency/code usage; non-blocking for now. +- Passlib `crypt` deprecation warning: expected on current Python; relevant for future Python-version migration planning. +- Intentionally logged exception traces from resilience tests (for example orphan-cleanup fault-injection): expected in those test cases when assertions still pass. + +Treat these as informational in local runs unless they appear alongside test failures or service startup errors. + +## Inner loop mode (client guide) + +II-Agent supports two top-level execution modes for agent turns: + +- `native` (default): Uses II-Agent's built-in execution path with direct LLM API calls. +- `a2a`: Delegates eligible work to an A2A adapter server. The adapter runs one of three backends — `copilot`, `claude-code`, or `codex` — selectable via `AGENT_A2A_BACKEND`. + +### Available A2A backends + +| Backend | Env var value | Required credentials | Supported models | +| --- | --- | --- | --- | +| **Copilot CLI** | `copilot` (default) | `GITHUB_TOKEN` or `GH_TOKEN` (optional — falls back to `gh auth` login) | Any (Copilot routes BYOK) | +| **Claude Code CLI** | `claude-code` | `ANTHROPIC_API_KEY` | `claude-*` models only | +| **Codex CLI** | `codex` | `OPENAI_API_KEY` | `o4-*`, `o3-*`, `o1-*`, `gpt-*` models | + +The adapter server validates credentials at startup. If `AGENT_A2A_BACKEND=claude-code` and `ANTHROPIC_API_KEY` is absent, the adapter will refuse to start. + +When `AGENT_INNER_LOOP_MODE=a2a`, the backend service also logs a warning if the configured LLM model is incompatible with the selected backend (for example, sending a `claude-*` model to the `codex` backend). + +### Recommended starting point + +Start with `native`, then enable `a2a` only when you want to validate delegated code-first workflows. + +### Relationship to local vs cloud mode + +Inner-loop mode and deployment mode are orthogonal: + +- Deployment mode selects where sandboxes run (`local` Docker or cloud/E2B). +- Inner-loop mode selects how agent turns are executed (`native` or `a2a`). + +From a user perspective, there is only one direct dependency: + +- If you choose `a2a`, `AGENT_A2A_AGENT_URL` must point to a reachable adapter endpoint in your selected environment. + +This means you can use: + +- `native` with local sandboxes. +- `native` with cloud sandboxes. +- `a2a` with local sandboxes (if adapter is running and reachable). +- `a2a` with cloud sandboxes (if adapter is deployed and reachable). + +### Simple configuration example + +Add these environment variables to your backend environment file (`.env`, `docker/.stack.env`, or `docker/.stack.env.local`, depending on your setup): + +```bash +AGENT_INNER_LOOP_MODE=native +AGENT_A2A_BACKEND=copilot +AGENT_A2A_AGENT_URL=http://localhost:18100 +AGENT_A2A_TIMEOUT_SECONDS=30 +AGENT_A2A_FALLBACK_TO_NATIVE=true +AGENT_A2A_CONTEXT_REUSE=true +``` + +To test delegated mode, switch only this value: + +```bash +AGENT_INNER_LOOP_MODE=a2a +``` + +For local kick-the-tires testing, run the A2A adapter in a separate terminal. Choose the backend that matches your credentials: + +```bash +# Copilot backend (default — uses 'gh auth' login or GITHUB_TOKEN): +uv run python -m ii_agent.integrations.a2a.adapter_server --host 0.0.0.0 --port 18100 --backend copilot + +# Claude Code backend (requires ANTHROPIC_API_KEY): +ANTHROPIC_API_KEY=sk-ant-... uv run python -m ii_agent.integrations.a2a.adapter_server --host 0.0.0.0 --port 18100 --backend claude-code + +# Codex backend (requires OPENAI_API_KEY): +OPENAI_API_KEY=sk-... uv run python -m ii_agent.integrations.a2a.adapter_server --host 0.0.0.0 --port 18100 --backend codex +``` + +Then restart the backend so it picks up: + +- `AGENT_INNER_LOOP_MODE=a2a` +- `AGENT_A2A_AGENT_URL=http://localhost:18100` + +With this setup, frontend requests can exercise the delegated inner-loop path end-to-end. + +### Pros and cons for end clients + +When using `a2a`: + +- Pros: + - Can be materially lower cost when routed through Copilot-backed inference instead of direct provider API-key usage. + - Better fit for code-heavy delegated flows. + - Clear path to multi-agent interoperability over A2A. + - Keeps Copilot-adapter concerns separated from core II-Agent runtime. +- Cons: + - Extra network/process hop can add latency. + - Requires adapter availability and health management. + - Operationally more moving parts than the default mode. + +When staying on `native`: + +- Pros: + - Simplest operations and lowest setup complexity. + - Strong compatibility with existing II-Agent features. + - Fewer external dependencies during local development. +- Cons: + - Usually higher model-inference cost when relying only on direct provider API keys. + - Less exposure to A2A interoperability patterns. + - Does not exercise delegated adapter behavior. + +Cost note: + +- The largest savings typically come from Copilot-routed delegated usage. +- If delegated mode is configured in BYOK passthrough style, billing follows your provider plan and savings may differ. + +### Important routing behavior + +Even when `AGENT_INNER_LOOP_MODE=a2a`, II-Agent keeps native routing for request classes that are platform-specific or policy-sensitive. + +These remain native-owned by design: + +- Slides workflows. +- Storybook generation workflows. +- Media generation workflows (image/video). +- Connector-backed operations (for example GitHub/Composio flows). +- Planning and milestone workflows. +- Dev infrastructure actions (environment/bootstrap/restart/port orchestration). +- Safety, policy, compliance, or capability exceptions. + +This means enabling `a2a` does not remove native capabilities. It changes routing for eligible requests while preserving the default path where it is required. diff --git a/docs/docs/local-docker-sandbox.md b/docs/docs/local-docker-sandbox.md new file mode 100644 index 000000000..28253791e --- /dev/null +++ b/docs/docs/local-docker-sandbox.md @@ -0,0 +1,413 @@ +# Local Docker Sandbox Setup + +This guide explains how to run ii-agent with **local Docker containers** instead of E2B cloud sandboxes. This setup keeps all data on your machine and is suitable for: + +- Privileged or NDA-protected data +- Air-gapped or restricted network environments +- Development and testing without cloud dependencies +- Self-hosted deployments + +## Overview + +ii-agent supports multiple sandbox providers through a pluggable architecture: + +| Provider | Description | Use Case | +|----------|-------------|----------| +| `e2b` (default) | E2B cloud micro-VMs | Production, quick setup | +| `docker` | Local Docker containers | Privacy, air-gapped, self-hosted | + +## Prerequisites + +- Docker Engine 20.10+ with Docker Compose v2 +- At least 4GB RAM available for containers +- An LLM API key (OpenAI, Anthropic, etc.) + +## Quick Start + +### 1. Build the Sandbox Image + +The sandbox image contains the same tools as E2B sandboxes (Python, Node.js, Playwright, code-server): + +```bash +cd /path/to/ii-agent + +# Build the sandbox image +docker build -t ii-agent-sandbox:latest -f e2b.Dockerfile . +``` + +This creates an image with: +- Python 3.10 with common data science packages +- Node.js 24 with npm/yarn/pnpm +- Playwright with Chromium for web automation +- code-server (VS Code in browser) +- noVNC + x11vnc for browser-based VNC access (user handoff for CAPTCHAs/login) +- Bun runtime +- tmux for session management + +### 2. Configure Environment + +```bash +# Copy the example environment file +cp docker/.stack.env.local.example docker/.stack.env.local + +# Edit and configure required values +nano docker/.stack.env.local +``` + +**Required configuration:** +```bash +# Generate a secure JWT secret +JWT_SECRET_KEY=$(openssl rand -hex 32) + +# Add at least one LLM API key +OPENAI_API_KEY=sk-... +# or +ANTHROPIC_API_KEY=sk-ant-... +``` + +### 3. Start the Stack + +```bash +# From the project root +docker compose -f docker/docker-compose.local.yaml \ + --env-file docker/.stack.env.local \ + up -d +``` + +### 4. Access the Application + +- **Frontend**: http://localhost:1420 +- **Backend API**: http://localhost:8000 +- **MinIO Console**: http://localhost:9001 (minioadmin/minioadmin) + +## How It Works + +### Architecture + +The local stack uses a **monolith backend** — there is no separate sandbox-server or tool-server. The backend manages sandbox containers directly via the Docker API. + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Host Machine │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────┐ ┌──────────────────────────────────────────────┐ │ +│ │Frontend │ │ Backend (:8000) │ │ +│ │ :1420 │ │ FastAPI + Socket.IO │ │ +│ └────┬────┘ │ SandboxService → DockerSandbox │ │ +│ │ │ PortPoolManager (ring-buffer allocation) │ │ +│ │ │ Orphan cleanup (background task) │ │ +│ │ └──────────┬───────────────────────────────────┘ │ +│ │ │ Docker API (socket mount) │ +│ │ ▼ │ +│ │ ┌──────────────────────────────────────────────┐ │ +│ │ │ Sandbox Containers (port range 30000-30999) │ │ +│ │ │ ┌─────────────────────────────────────────┐ │ │ +│ │ │ │ ii-sandbox-{id} │ │ │ +│ │ │ │ MCP Server (:6060) code-server (:9000)│ │ │ +│ │ │ │ noVNC (:6080) Xvfb + x11vnc + Chromium│ │ │ +│ │ │ │ Dev servers (:3000, :5173, :8080) │ │ │ +│ │ │ └─────────────────────────────────────────┘ │ │ +│ │ │ ┌──────────┐ ┌──────────┐ │ │ +│ │ │ │Sandbox 2 │ │ ... │ │ │ +│ │ │ └──────────┘ └──────────┘ │ │ +│ │ └──────────────────────────────────────────────┘ │ +│ │ │ +│ ┌────┴─────────────────────────────────────────────────────┐ │ +│ │ Docker Network │ │ +│ └───────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─────────┐ ┌─────────┐ ┌─────────────────┐ │ +│ │Postgres │ │ Redis │ │ MinIO (S3-compat│ │ +│ │ :5433 │ │ :6379 │ │ :9000 / :9001) │ │ +│ └─────────┘ └─────────┘ └─────────────────┘ │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Sandbox Lifecycle + +1. **Creation**: When a task requires code execution, the backend's `SandboxService` creates a new Docker container via `DockerSandbox.create()` +2. **Execution**: Commands and file operations run inside the isolated container via MCP server +3. **Persistence**: Workspace files persist in a named Docker volume for the session duration +4. **Pause/Resume**: Stopped containers are automatically restarted when a user revisits the session (see Sandbox Restart below) +5. **Cleanup**: Containers are removed when the session is deleted (orphan cleanup) or manually killed + +### Sandbox Restart on Session Load + +When a user navigates to a session with an existing sandbox, the backend automatically reconnects: + +1. Frontend sends `sandbox_status` Socket.IO command +2. Backend calls `SandboxService.get_sandbox_for_session()` → `DockerSandbox.connect()` +3. If container is `paused` → `unpause()` +4. If container is `exited`/`created` → `start()` + readiness check (MCP health endpoint) +5. Port mappings are re-extracted and registered with the port pool manager +6. Frontend receives sandbox URLs (code-server, noVNC) and reconnects + +The "Awake Sandbox" button in the UI follows the same code path. + +### Key Differences from E2B + +| Feature | E2B Cloud | Docker Local | +|---------|-----------|--------------| +| Startup time | ~150ms (pre-warmed) | ~2-5s (cold start) | +| Isolation | Firecracker micro-VM | Docker container | +| Network | Requires ngrok tunnel | Host-local only | +| Data location | E2B infrastructure | Your machine | +| Scaling | Managed by E2B | Manual (resource limits) | +| Cost | Pay per use | Free (your hardware) | + +## Configuration Reference + +### Environment Variables + +#### Sandbox Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `SANDBOX_PROVIDER` | `e2b` | Set to `docker` for local sandboxes | +| `SANDBOX_DOCKER_IMAGE` | `ii-agent-sandbox:latest` | Docker image for sandboxes | +| `SANDBOX_DOCKER_NETWORK` | `ii-agent-local_ii-network` | Docker network for sandbox containers | +| `SANDBOX_DOCKER_HOST` | `localhost` | Hostname used in sandbox URLs returned to browser. Set to LAN IP when browser is on a different machine. | +| `SANDBOX_PORT_RANGE_START` | `30000` | Start of host port range for sandbox port mappings | +| `SANDBOX_PORT_RANGE_END` | `30999` | End of host port range for sandbox port mappings | +| `SANDBOX_TIMEOUT_SECONDS` | `7200` | Idle timeout before sandbox auto-pauses (seconds) | +| `SANDBOX_MCP_SERVER_PORT` | `6060` | MCP server port inside sandbox containers | +| `SANDBOX_CODE_SERVER_PORT` | `9000` | code-server port inside sandbox containers | +| `SANDBOX_NOVNC_PORT` | `6080` | noVNC port inside sandbox containers | +| `POSTGRES_PORT` | `5432` | PostgreSQL port (use 5433 if 5432 is taken) | + +#### Orphan Cleanup Configuration + +When running in local mode, the backend automatically cleans up containers whose associated chat sessions have been deleted. + +| Variable | Default | Description | +|----------|---------|-------------| +| `SANDBOX_LOCAL_MODE` | `false` | Set to `true` to enable Docker sandbox features and orphan cleanup | +| `SANDBOX_ORPHAN_CLEANUP_ENABLED` | `true` | Can disable cleanup for debugging | +| `SANDBOX_ORPHAN_CLEANUP_INTERVAL_SECONDS` | `60` | How often to check for orphaned sandboxes | +| `SANDBOX_BACKEND_URL` | `http://backend:8000` | Backend URL for session verification during cleanup | + +**How It Works:** +1. Every 60 seconds (configurable), a background task in the backend performs three cleanup passes: + - **Orphan sweep (DB-driven):** Queries all Docker sandbox records and checks whether the linked session has been deleted. If so, kills the container, releases ports, removes the workspace volume, and marks the DB record as deleted. + - **Stale pause:** Pauses (`docker stop`) running sandboxes whose sessions have been idle longer than `SANDBOX_TIMEOUT_SECONDS`. Paused containers retain their filesystem and can be resumed on the next session access. + - **Docker zombie sweep:** Lists all Docker containers with the `ii-agent.sandbox=true` label directly via the Docker API, then removes any container whose full ID does not match an active (non-deleted) DB record. This catches containers orphaned by bulk session deletions, DB record failures, or application crashes. +2. All three passes apply the same 5-minute grace period to avoid racing with sandbox initialization. + +#### Storage Configuration + +Local deployments use local filesystem storage instead of cloud storage (GCS): + +| Variable | Default | Description | +|----------|---------|-------------| +| `STORAGE_PROVIDER` | `local` | Use `local` for filesystem, `gcs` for Google Cloud | +| `LOCAL_STORAGE_PATH` | `/.ii_agent/storage` | Base directory for file storage | +| `PUBLIC_TOOL_SERVER_URL` | (auto) | Public URL for the tool server (for file URLs) | + +When using local storage: +- Files are stored on the local filesystem +- Content-types are preserved in `.meta` sidecar files +- Files are served via the tool server's `/storage/{path}` endpoint +- Path traversal attacks are prevented by path validation + +### Port Management + +Docker sandboxes expose internal ports (MCP server, code-server, noVNC, dev servers) to the host. The backend's `PortPoolManager` manages a **port pool** with ring-buffer allocation to prevent conflicts: + +- **Default range**: 30000-30999 (1000 ports) +- **Per sandbox**: 6 ports allocated (MCP:6060, code-server:9000, noVNC:6080, plus dev ports 3000, 5173, 8080) +- **Capacity**: ~166 concurrent sandboxes with default settings +- **Ring-buffer allocation**: Ports are allocated by advancing a cursor through the range. Released ports are not reused until the cursor wraps around the entire pool. This prevents port conflicts when restarting stopped containers whose ports may have been assigned to newer sandboxes. +- **Startup scan**: On boot, the port manager scans existing Docker containers and registers their ports as allocated, positioning the ring cursor past the highest in-use port. + +**Key implementation files:** +- `src/ii_agent/agents/sandboxes/docker.py` — Docker sandbox provider (`DockerSandbox`) +- `src/ii_agent/agents/sandboxes/port_manager.py` — Port pool allocation (ring-buffer) +- `src/ii_agent/agents/sandboxes/orphan_cleanup.py` — Orphan cleanup background task +- `src/ii_agent/agents/sandboxes/service.py` — `SandboxService` (provider dispatch, DB persistence) +- `src/ii_agent/agents/sandboxes/base.py` — `Sandbox` base class +- `src/ii_agent/core/config/sandbox.py` — `SandboxSettings` configuration + +### noVNC Browser Handoff + +Each sandbox container runs a **noVNC** web viewer (port 6080) that provides browser-based access to the sandbox's virtual display. This enables a **human-in-the-loop** workflow: + +1. The agent automates a browser task using Playwright +2. The agent hits a barrier it can't handle (CAPTCHA, login page, 2FA prompt) +3. The agent calls `expose_port(sandbox_id, 6080, external=True)` to get a noVNC URL +4. The agent shares the URL with the user +5. The user opens the URL in their browser and interacts directly with the sandbox's Chromium instance +6. The user tells the agent they're done +7. The agent resumes automation + +**Architecture:** + +``` +Agent (Playwright MCP) → Chromium → Xvfb :99 ← x11vnc :5900 ← websockify :6080 ← User's browser +``` + +The virtual display was always running (for Playwright's headed mode). x11vnc + noVNC simply provide a window into it. Both the agent and user can interact with the browser simultaneously (x11vnc runs with `-shared`). + +**Manual access** (for debugging — find the host-mapped port): + +```bash +# Check Docker port mapping directly +docker port ii-sandbox- 6080 +``` + +Then open `http://localhost:/vnc.html` in your browser. + +### Resource Limits + +Each sandbox container is created with resource constraints. Adjust in `DockerSandbox.create()` if needed. + +## Connecting Your Local MCP Server + +If you have a local MCP server with privileged data: + +### MCP Server on Host Machine + +```bash +# In .stack.env.local +MCP_SERVER_URL=http://host.docker.internal:6060 +``` + +### MCP Server in Docker + +If your MCP server runs in a container, put it on the same network: + +```yaml +# In docker-compose.local.yaml, add your MCP server: +services: + mcp-server: + image: your-mcp-server:latest + networks: + - default + ports: + - "6060:6060" +``` + +Then configure: +```bash +MCP_SERVER_URL=http://mcp-server:6060 +``` + +## Troubleshooting + +### Container fails to start + +Check backend logs: +```bash +docker logs ii-agent-local-backend-1 +``` + +Verify the sandbox image exists: +```bash +docker images | grep ii-agent-sandbox +``` + +### Permission denied on Docker socket + +The backend container needs access to create sandbox containers via the Docker socket mount. Either: + +1. Add your user to the docker group: `sudo usermod -aG docker $USER` +2. Or run with elevated privileges (not recommended for production) + +### PostgreSQL port conflict + +If you have PostgreSQL running locally: +```bash +# In .stack.env.local +POSTGRES_PORT=5433 +``` + +### Sandbox containers not cleaning up + +**Automatic Cleanup (Recommended):** + +If `SANDBOX_LOCAL_MODE=true` is set, orphan cleanup runs automatically. Check if it's working: +```bash +# Check backend logs for cleanup activity +docker logs ii-agent-local-backend-1 2>&1 | grep -i orphan +``` + +**Manual cleanup:** +```bash +# List sandbox containers +docker ps -a | grep ii-sandbox + +# Remove all stopped sandbox containers +docker container prune -f --filter "label=ii-agent.sandbox=true" +``` + +## Security Considerations + +### Network Isolation + +By default, sandbox containers can access the network. For stricter isolation: + +```yaml +# In DockerSandbox configuration +network_mode: none # Complete isolation +# or +network_mode: internal # Container-to-container only +``` + +### Resource Limits + +Prevent runaway containers: + +```python +# These are configured in DockerSandbox.create() (src/ii_agent/agents/sandboxes/docker.py) +mem_limit="3072m" # 3 GB memory +cpu_period=100000 +cpu_quota=200000 # 2 CPUs +pids_limit=512 +security_opt=["no-new-privileges"] +cap_drop=["ALL"] +cap_add=["CHOWN", "SETUID", "SETGID", "DAC_OVERRIDE"] +``` + +### Filesystem Access + +Sandbox containers only have access to: +- Their workspace volume (mounted at `/workspace`) +- Temporary files (mounted at `/tmp`) + +They cannot access host filesystem or other containers' data. + +## Development + +### Running Tests + +```bash +# Test sandbox provider +uv run pytest src/tests/unit/agent/test_docker_sandbox.py -v +uv run pytest src/tests/unit/agent/test_port_manager.py -v +uv run pytest src/tests/unit/agent/test_orphan_cleanup.py -v +``` + +### Extending the Sandbox Image + +Create a custom Dockerfile based on `e2b.Dockerfile`: + +```dockerfile +FROM ii-agent-sandbox:latest + +# Add your custom tools +RUN pip install your-private-package +``` + +Build and configure: +```bash +docker build -t ii-agent-sandbox-custom:latest -f Dockerfile.custom . +SANDBOX_DOCKER_IMAGE=ii-agent-sandbox-custom:latest +``` + +## Contributing + +This Docker sandbox provider is designed as an extensible alternative to E2B. Contributions welcome: + +- Performance improvements +- Additional isolation options (gVisor, Kata containers) +- Kubernetes provider for scalable deployments +- Better resource management and pooling diff --git a/docs/docs/required-environment-variables/index.md b/docs/docs/required-environment-variables/index.md new file mode 100644 index 000000000..6b3144259 --- /dev/null +++ b/docs/docs/required-environment-variables/index.md @@ -0,0 +1,123 @@ +--- +id: required-environment-variables +title: Required Environment Variables +slug: /required-environment-variables +sidebar_label: Required Environment Variables +sidebar_position: 3 +description: Definitive checklist for required stack env keys, including local-mode env file naming. +--- + +# Required Environment Variables + +The Docker stack only works when **every** mandatory variable in the correct env file is populated. + +- Full stack mode uses `docker/.stack.env`. +- Local Docker sandbox mode uses `docker/.stack.env.local`. + +Use this checklist for both modes and store secrets outside Git. + +## How to read this page + +- Each section maps to a `/docs/required-environment-variables/*` deep-dive. Follow the link when you need screenshots, UI paths, or troubleshooting tips. +- Variables marked with ✅ are required; ones marked with ☑️ can be blank but should be reviewed before production demos. +- Keep secrets in a password manager or secret store—this file is intentionally gitignored. + +## Frontend build [`/docs/required-environment-variables/frontend-env`](/docs/required-environment-variables/frontend-env) + +| Variable | Status | Notes | +| --- | --- | --- | +| `FRONTEND_BUILD_MODE` | ✅ | `production` for demos; `development` only while debugging the containerized build. | +| `VITE_API_URL` | ✅ | Base URL the UI uses to hit the backend (default `http://localhost:8000`). | +| `VITE_GOOGLE_CLIENT_ID` | ☑️ | Needed when exposing Google OAuth in the browser. | +| `VITE_STRIPE_PUBLISHABLE_KEY` | ☑️ | Supply when billing is enabled. | +| `VITE_SENTRY_DSN` | ☑️ | Optional Sentry DSN for browser traces. | +| `VITE_DISABLE_CHAT_MODE` | ☑️ | Toggle chat UI for demo-only builds. | + +## Networking and tunnels [`/docs/required-environment-variables/networking-tunnels`](/docs/required-environment-variables/networking-tunnels) + +| Variable | Status | Notes | +| --- | --- | --- | +| `NGROK_AUTHTOKEN` | ✅ | Required to open HTTPS tunnels. | +| `NGROK_REGION` | ✅ | Choose the closest region (`us`, `eu`, `ap`, ...). | +| `NGROK_AGENT_EXTRA_ARGS` | ☑️ | Reserved domains, header rewrites, etc. Leave empty if unsure. | + +## Host paths [`/docs/required-environment-variables/host-paths`](/docs/required-environment-variables/host-paths) + +| Variable | Status | Notes | +| --- | --- | --- | +| `GOOGLE_APPLICATION_CREDENTIALS` | ✅ | Absolute path to the GCP service-account JSON mounted into containers. | + +## LLM configuration and auth [`/docs/required-environment-variables/llm-auth`](/docs/required-environment-variables/llm-auth) + +| Variable | Status | Notes | +| --- | --- | --- | +| `LLM_CONFIGS` | ✅ | JSON describing each available model (id, key, base URL, max tokens, retries). | +| `RESEARCHER_AGENT_CONFIG` | ✅ | JSON describing which models power research/report flows. | +| `GOOGLE_CLIENT_ID` | ☑️ | Backend OAuth client ID. | +| `GOOGLE_REDIRECT_URI` | ☑️ | Callback URL (keep the localhost default for dev). | +| `ACCESS_TOKEN_EXPIRE_MINUTES` | ☑️ | JWT lifetime. | +| `ENHANCE_PROMPT_OPENAI_API_KEY` | ☑️ | Dedicated key for the prompt enhancer pipeline. | + +## Inner loop controls (optional) [`/docs/getting-started`](/docs/getting-started) + +Use these only if you want to enable delegated A2A execution. If omitted, II-Agent stays on the default native loop. + +These settings are independent from `SANDBOX_PROVIDER` (local/cloud sandbox choice). + +| Variable | Status | Notes | +| --- | --- | --- | +| `AGENT_INNER_LOOP_MODE` | ☑️ | `native` (default) or `a2a`. Start with `native` unless you are actively testing delegated mode. | +| `AGENT_A2A_BACKEND` | ☑️ | `copilot` (default), `claude-code`, or `codex`. Selects the A2A adapter backend when mode is `a2a`. See [Getting Started](/docs/getting-started#inner-loop-mode-client-guide) for model restrictions per backend. | +| `AGENT_A2A_AGENT_URL` | ☑️ | Base URL for the adapter when mode is `a2a` (example: `http://localhost:18100`). | +| `AGENT_A2A_TIMEOUT_SECONDS` | ☑️ | Request timeout for A2A calls. | +| `AGENT_A2A_FALLBACK_TO_NATIVE` | ☑️ | Keep `true` for safer operation; falls back to native when A2A fails. | +| `AGENT_A2A_CONTEXT_REUSE` | ☑️ | Reuses A2A context across turns for continuity. | + +## Storage [`/docs/required-environment-variables/storage`](/docs/required-environment-variables/storage) + +| Variable | Status | Notes | +| --- | --- | --- | +| `SLIDE_ASSETS_PROJECT_ID`, `SLIDE_ASSETS_BUCKET_NAME` | ✅ | Write destination for slide deck artifacts. | +| `FILE_UPLOAD_PROJECT_ID`, `FILE_UPLOAD_BUCKET_NAME` | ✅ | General-purpose uploads bucket. | +| `AVATAR_PROJECT_ID`, `AVATAR_BUCKET_NAME` | ☑️ | Avatar-specific bucket; can reuse the upload bucket in dev. | +| `CUSTOM_DOMAIN` | ☑️ | Domain used when building shareable URLs (`sfile.ii.inc` by default). | + +## Backend sandbox [`/docs/required-environment-variables/backend-sandbox`](/docs/required-environment-variables/backend-sandbox) + +| Variable | Status | Notes | +| --- | --- | --- | +| `SANDBOX_TEMPLATE_ID` | ✅ | VM or container template ID used for user sandboxes. | +| `TIME_TIL_CLEAN_UP` | ✅ | Idle timeout in seconds before sandboxes are reclaimed. | + +## Tool server baseline [`/docs/required-environment-variables/tool-server-baseline`](/docs/required-environment-variables/tool-server-baseline) + +| Variable | Status | Notes | +| --- | --- | --- | +| `STORAGE_CONFIG__GCS_BUCKET_NAME`, `STORAGE_CONFIG__GCS_PROJECT_ID` | ✅ | Buckets used for artifacts generated by the tool server. | + +## Sandbox server [`/docs/required-environment-variables/sandbox-server`](/docs/required-environment-variables/sandbox-server) + +| Variable | Status | Notes | +| --- | --- | --- | +| `SANDBOX_PROVIDER` | ☑️ | `e2b` (cloud, default) or `docker`/`local` (local Docker containers). | +| `E2B_API_KEY` | ☑️ | API key issued by e2b (not needed for local Docker mode). | +| `E2B_TEMPLATE_ID` | ☑️ | Template ID for e2b sandbox provisioning (not needed for local Docker mode). | +| `SANDBOX_DOCKER_IMAGE` | ☑️ | Docker image for local sandboxes (default `ii-agent-sandbox:latest`). | +| `LOCAL_MODE` | ☑️ | Enable local-mode features such as orphan cleanup. | + +## Core infrastructure [`/docs/required-environment-variables/core-infra`](/docs/required-environment-variables/core-infra) + +| Variable | Status | Notes | +| --- | --- | --- | +| `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB`, `POSTGRES_PORT` | ✅ | Local Postgres credentials and host port mapping. | +| `DATABASE_URL` | ✅ | Async connection string consumed by the backend. | +| `SANDBOX_DB_NAME`, `SANDBOX_DATABASE_URL` | ☑️ | Needed when the sandbox service uses a dedicated database. | +| `REDIS_PORT` | ✅ | Host port for Redis; change if it conflicts with another service. | +| `BACKEND_PORT`, `FRONTEND_PORT`, `SANDBOX_SERVER_PORT`, `TOOL_SERVER_PORT`, `NGROK_METRICS_PORT`, `MCP_PORT` | ✅ | Host ports for every HTTP-facing service and dashboards. | + +## Validation checklist + +1. Run `./scripts/run_stack.sh --build`. If Docker reports a missing environment variable, fix it before proceeding. +2. Visit `http://localhost:` and complete a request. Watch backend logs for auth/model errors. +3. Inspect `http://localhost:` to ensure tunnels connected. +4. Commit the final env file (`docker/.stack.env` or `docker/.stack.env.local`) to your personal secret store. Never check it into Git. diff --git a/docs/docs/required-environment-variables/llm-auth.md b/docs/docs/required-environment-variables/llm-auth.md new file mode 100644 index 000000000..0fc8fb212 --- /dev/null +++ b/docs/docs/required-environment-variables/llm-auth.md @@ -0,0 +1,70 @@ +--- +id: llm-auth +title: LLM and Authentication Variables +slug: /required-environment-variables/llm-auth +sidebar_position: 13 +--- + +The backend relies on these secrets to talk to model providers, orchestrate researcher/report agents, and enable OAuth flows. + +## Optional inner loop mode controls + +These settings are optional and are intended for teams evaluating delegated A2A execution. For normal onboarding, keep the default `native` mode. + +```bash +AGENT_INNER_LOOP_MODE=native +AGENT_A2A_AGENT_URL=http://localhost:18100 +AGENT_A2A_TIMEOUT_SECONDS=30 +AGENT_A2A_FALLBACK_TO_NATIVE=true +AGENT_A2A_CONTEXT_REUSE=true +``` + +### Practical guidance + +- Use `native` as your baseline for production onboarding. +- Use `a2a` when you want to test delegated Copilot-style inner-loop behavior. +- Keep fallback enabled to preserve reliability if the adapter is unavailable. +- If your deployment uses Copilot-backed delegated inference, it is often significantly cheaper than direct API-key-only native inference. +- If delegated mode is configured as BYOK passthrough, cost follows your provider billing plan. + +### What still stays native in `a2a` mode + +Even when delegated mode is enabled, II-Agent intentionally keeps some request categories on the native path: + +- Slides workflows. +- Storybook generation. +- Media generation. +- Connector-backed operations. +- Planning/milestone workflows. +- Dev infrastructure operations. +- Safety/compliance/capability exceptions. + +This preserves platform behavior while allowing delegated routing for eligible requests. + +## `LLM_CONFIGS` + +1. Decide which providers you want to use (OpenAI-compatible, Anthropic, Gemini, etc.). +2. For each provider, collect the API key and base URL if the provider requires a custom endpoint. +3. Build a JSON array describing each model, e.g.: + ```json + [ + { + "provider": "openai", + "model": "gpt-4o-mini", + "apiKey": "sk-your-key", + "baseUrl": "https://api.openai.com/v1", + "maxRetries": 3 + } + ] + ``` +4. Paste the serialized JSON blob into `LLM_CONFIGS` (wrap the value in single quotes inside `.stack.env` so special characters survive). + +### Supported Anthropic models + +The frontend model selector includes: + +- `claude-sonnet-4-5` / `claude-sonnet-4-6` +- `claude-opus-4-5` / `claude-opus-4-6` + +When extended thinking is enabled (`thinking_tokens >= 1024`), the Anthropic provider automatically sets `max_tokens = thinking_tokens + 8192` to leave room for both reasoning and the final response. + diff --git a/docs/docs/required-environment-variables/sandbox-server.md b/docs/docs/required-environment-variables/sandbox-server.md new file mode 100644 index 000000000..31486992d --- /dev/null +++ b/docs/docs/required-environment-variables/sandbox-server.md @@ -0,0 +1,79 @@ +--- +id: sandbox-server +title: Sandbox Server Integration +slug: /required-environment-variables/sandbox-server +sidebar_position: 17 +--- + +These variables configure the sandbox provider that powers interactive coding environments. II-Agent supports two providers: **E2B** (cloud) and **Docker** (local). + +## Choosing a provider + +Set `SANDBOX_PROVIDER` in the env file for your selected mode: + +- `docker/.stack.env` for full stack mode. +- `docker/.stack.env.local` for local Docker mode. + +| Value | Description | +|-------|-------------| +| `e2b` | Cloud sandboxes via [e2b.dev](https://e2b.dev/). Requires `E2B_API_KEY`. | +| `docker` or `local` | Local Docker containers. No cloud account needed. | + +For local-only deployments see the [Local Docker Sandbox](../local-docker-sandbox.md) guide. + +## E2B cloud mode + +### `E2B_API_KEY` + +1. Log into the [e2b dashboard](https://e2b.dev/) (or your equivalent provider). +2. Navigate to **API Keys** and create a new key scoped for development use. +3. Copy the key (looks like `e2b_live_...`) and paste it into your active env file (`docker/.stack.env` or `docker/.stack.env.local`). +4. Rotate the key if you suspect compromise -- do not commit it to Git. + +### `E2B_TEMPLATE_ID` + +1. Open the sandbox provisioning portal or service you use for backend execution (internal tool, provider dashboard, etc.). +2. Locate the template/image you want the stack to spawn (for example "ii-backend-dev"). +3. Copy its unique identifier and place it in your active env file (`docker/.stack.env` or `docker/.stack.env.local`) as `E2B_TEMPLATE_ID`. + +## Docker local mode + +When `SANDBOX_PROVIDER=docker` (or `local`), the backend creates ephemeral Docker containers on the host. No cloud account or API key is needed. + +### Key variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SANDBOX_DOCKER_IMAGE` | `ii-agent-sandbox:latest` | Docker image to spawn for each sandbox. | +| `SANDBOX_DOCKER_NETWORK` | `ii-agent-local_ii-network` | Docker network sandboxes attach to. | +| `SANDBOX_DOCKER_HOST` | `localhost` | Hostname in sandbox URLs returned to browser. Set to LAN IP when browser is on another machine. | +| `SANDBOX_PORT_RANGE_START` | `30000` | Start of host port range for sandbox port mappings. | +| `SANDBOX_PORT_RANGE_END` | `30999` | End of host port range. | +| `SANDBOX_LOCAL_MODE` | `false` | Enable local-mode features (port scanning, orphan cleanup). | +| `SANDBOX_ORPHAN_CLEANUP_ENABLED` | `true` | Auto-remove sandboxes whose sessions no longer exist. | +| `SANDBOX_ORPHAN_CLEANUP_INTERVAL_SECONDS` | `60` | How often (seconds) to check for orphans. | +| `SANDBOX_BACKEND_URL` | `http://backend:8000` | Backend URL for session verification during cleanup. | +| `SANDBOX_MCP_SERVER_PORT` | `6060` | MCP server port inside sandbox containers. | +| `SANDBOX_CODE_SERVER_PORT` | `9000` | code-server port inside sandbox containers. | +| `SANDBOX_NOVNC_PORT` | `6080` | noVNC port inside sandbox containers. | +| `SANDBOX_TIMEOUT_SECONDS` | `7200` | Idle timeout (seconds) before sandbox auto-pauses. | + +### Container services + +Each Docker sandbox container runs: + +| Service | Container port | Description | +|---------|---------------|-------------| +| MCP Server | 6060 | Tool calls from the agent | +| code-server | 9000 | VS Code in the browser | +| noVNC | 6080 | Browser-based VNC for user handoff (CAPTCHAs, login) | +| Xvfb + x11vnc | :99 / 5900 | Virtual display for headed Chromium | + +Ports are dynamically mapped to the host from pool 30000-30999 using ring-buffer allocation (6 ports per sandbox, ~166 concurrent sandboxes). + +## `SANDBOX_TIMEOUT_SECONDS` + +- Specifies how long (in seconds) an idle sandbox lives before auto-pause. +- Default: `7200` (2 hours). Paused containers can be restarted when the user revisits the session. +- Choose a value that balances resource usage and usability. + diff --git a/docs/migration-knowledge.md b/docs/migration-knowledge.md new file mode 100644 index 000000000..9d2bb96d2 --- /dev/null +++ b/docs/migration-knowledge.md @@ -0,0 +1,170 @@ +# Migration Knowledge: Old System → Local Docker Stack + +## Overview +Migration of ii-agent from E2B cloud sandboxes + GCS storage to local Docker sandboxes + MinIO storage. +All data lives on a single Linux host accessed from a Windows PC browser via LAN IP. + +--- + +## Database Migration + +### Source & Target +- **Backup DB**: `iiagentdev_backup` (old E2B-based system) +- **Target DB**: `iiagentdev` (new Docker-based system) +- **PostgreSQL**: Port 5433, user=iiagent + +### Tables Migrated +| Table | Records | Notes | +|-------|---------|-------| +| `sessions` | 65 | All reassigned from `admin@ii.inc` → `dev@localhost` (eac4f4fd) | +| `chat_messages` | 317 | JSONB content column | +| `agent_sandboxes` | 38 | `provider_sandbox_id` updated to Docker container IDs (12 records) | +| `application_events` | 8,328 | Migrated via `scripts/local/migrate_events.py`; 16 event type mappings (old → new dotted names) | +| `run_tasks` | 270 | From `agent_run_tasks` → `run_tasks` with `task_type='agent_run'` | +| `chat_provider_files` | 2 | From `provider_files` | +| `chat_provider_vector_stores` | 1 | From `provider_vector_stores` | +| `slide_contents` | Multiple | Image URLs rewritten (see below) | +| `user_assets` / `session_assets` | 226 | Reassigned user ownership | +| `credit_balances` | 1 | 995k credits transferred | + +### Event Type Mappings +Old event names (e.g., `user_message`, `tool_call`, `agent_message`) were mapped to new dotted format +(e.g., `agent.user.message`, `agent.tool.call`, `agent.message`). See `scripts/local/migrate_events.py`. + +### Session app_kind Classification +- **`app_kind='agent'`**: Frontend loads from `application_events` table +- **`app_kind='chat'`**: Frontend loads from `chat_messages` table +- **Misclassification bug**: 16 sessions had `app_kind='agent'` but only `chat_messages` (0 events) → showed as empty +- **Fix**: Changed to `app_kind='chat'` so they render via the chat pipeline + +### Key Gotcha: User Reassignment +All data was owned by `admin@ii.inc` (bace0701) in the backup. Had to UPDATE all FK references +(`user_id`) across sessions, assets, credits to `dev@localhost` (eac4f4fd). + +--- + +## URL Rewriting + +### Problem: localhost URLs +`DockerSandbox.expose_port()` hardcoded `http://localhost:{port}` — inaccessible from a remote browser. + +### URL Categories Found in Stored Data +| Pattern | Count | Source | Fixable? | +|---------|-------|--------|----------| +| `http://localhost:8000/files/...` | ~130 events | Backend file/slide asset URLs | ✅ Rewrite to LAN IP | +| `http://localhost:30xxx/...` | ~400 events | Sandbox exposed port URLs (`expose_port()`) | ✅ Rewrite (works when sandbox running) | +| `http://localhost:4000/...` | 4 events | Sandbox app port | ✅ Rewrite | +| `http://localhost:1236/storage/image_search/...` | 67 events | Old E2B sandbox internal file server | ❌ Dead links — service doesn't exist in Docker | + +### Fix Applied +- **Script**: `scripts/local/rewrite_localhost_urls.py` +- **SQL**: `replace(content::text, 'http://localhost:', 'http://{host}:')` on: + - `application_events.content` (JSONB) — 606 rows + - `slide_contents.slide_content` (varchar) — 1 row + - `chat_messages.content` (JSONB) — 5 rows +- **Code fix**: Added `SANDBOX_DOCKER_HOST` setting to `SandboxSettings`, used in `expose_port()` instead of hardcoded `localhost` +- **Frontend fix**: Applied `rewriteLocalhostUrl()` to all `setBrowserUrl` / `resultUrl` / `pipUrl` paths that previously used raw URLs from tool results + +### Column Type Gotcha +- `application_events.content` → JSONB → use `replace(content::text, ...)::jsonb` +- `chat_messages.content` → JSONB → same cast +- `slide_contents.slide_content` → **varchar** → NO cast needed, just `replace(slide_content, ...)` +- Casting varchar HTML to `::jsonb` causes `InvalidTextRepresentationError` + +--- + +## Image/File Serving + +### Slide Assets +- **Old**: Images stored in E2B sandbox filesystem, served via sandbox's code-server (port 1236) +- **New**: Images extracted from Docker sandbox containers → uploaded to MinIO → served via `/files/slides/assets/{hash}.{ext}` +- **Endpoint**: `src/ii_agent/files/slide_assets_router.py` — public, no auth +- **MinIO path**: `content/slides/{filename}` +- **Upload script**: `scripts/local/upload_slide_assets.py` +- **12 of 13 images recovered**; 1 image from E2B session (9ca66417) unrecoverable + +### Session Attachments +- Served via `/v1/assets/{asset_id}/download` (JWT required) +- Storage: MinIO bucket `ii-agent`, paths like `users/{uid}/media/{fid}.{ext}` +- Signed URLs generated on-demand + +### Sandbox File Preview +- Router `/sandbox-files/{session_id}/preview` was **orphaned** (not registered in `app/routers.py`) +- **Fixed**: Registered at root level (frontend calls without `/v1/` prefix) +- Only works for RUNNING sandboxes — dead sandboxes return 503 + +### File Accessibility Rules +1. **Live sandbox files**: Accessible via Socket.IO `file_content` command or `/sandbox-files/.../preview` +2. **Uploaded files**: Persisted in MinIO, accessible via signed URLs +3. **Slide images**: Persisted in MinIO, accessible via `/files/slides/assets/` +4. **Dead sandbox files**: LOST unless explicitly uploaded to storage before sandbox died +5. **E2B sandbox files**: Gone forever — E2B sandboxes are ephemeral cloud instances + +--- + +## Sandbox Architecture + +### Port Mapping +- Docker sandboxes expose ports 30000-30999 on the host +- Well-known ports: 6060 (MCP), 9000 (code-server), 6080 (noVNC), 3000/5173/8080 (dev servers) +- `SANDBOX_DOCKER_HOST` env var controls the hostname in exposed URLs (default: `localhost`) +- **Ring-buffer allocation:** `PortPoolManager` advances a cursor through the range, wrapping around. Released ports are not reused until the cursor cycles back, preventing conflicts when restarting stopped containers that still hold their original port mappings. + +### Container Lifecycle +- Running containers: discoverable via Docker labels +- Exited containers: still exist with their filesystems (can be restarted) +- Removed containers: data lost +- Port 1236: Was E2B's internal file server, doesn't exist in Docker sandbox + +### Sandbox Restart on Session Load +When a user navigates to a session, the frontend sends a `sandbox_status` Socket.IO command. +The backend calls `SandboxService.get_sandbox_for_session()` → `DockerSandbox.connect()`, which: +1. Looks up the container by `provider_sandbox_id` (Docker container ID) or by label fallback +2. If container is `paused` → `unpause()` +3. If container is `exited`/`created` → `start()` + `_wait_for_ready()` (MCP health check) +4. Extracts port mappings from the running container +5. Returns the connected sandbox instance + +The "Awake Sandbox" button on the frontend fires `awake_sandbox` which follows the same path. + +--- + +## Scripts Reference + +| Script | Purpose | Idempotent? | +|--------|---------|-------------| +| `scripts/local/migrate_events.py` | Migrate events from backup DB | No (check target first) | +| `scripts/local/migrate_remaining_data.py` | Migrate run_tasks, provider_files, vector_stores | No | +| `scripts/local/upload_slide_assets.py` | Extract images from sandbox containers → MinIO | Yes (skips existing) | +| `scripts/local/rewrite_localhost_urls.py` | Replace `localhost:` → `{host}:` in DB | Idempotent (no-op if already done) | + +--- + +## Environment Configuration + +### Key Settings for Remote Access +```env +# In docker/.stack.env.local: +VITE_API_URL=http://:8000 # Frontend API base URL +LOCAL_STORAGE_URL_BASE=http://:8000/files # Storage URL for images +SANDBOX_DOCKER_HOST= # Sandbox port URLs +``` + +### Docker Compose +- File: `docker/docker-compose.local.yaml` +- Project: `ii-agent-local` +- Services: postgres (5433), redis (6379), minio (9000/9001), frontend (1420), backend (8000) +- Backend mounts Docker socket for spawning sandbox containers + +--- + +## Common Pitfalls + +1. **Transaction rollback**: If a multi-table UPDATE script errors on one table, ALL changes roll back (even previously "successful" ones within the same transaction) +2. **JSONB vs varchar**: Always check column types before writing UPDATE statements with casts +3. **app_kind determines rendering**: Agent sessions that only have chat_messages appear empty — must be classified as `app_kind='chat'` +4. **E2B sandbox data is unrecoverable**: Any files/images that existed only in E2B sandboxes are permanently lost +5. **Frontend axios baseURL**: Set to `VITE_API_URL` — all relative paths resolve against this +6. **MinIO bucket auto-creation**: Must create `ii-agent` bucket manually on first setup +7. **Alembic migrations**: Run at startup unless `II_AGENT_SKIP_MIGRATIONS=true` +8. **Frontend URL rewriting**: `rewriteLocalhostUrl()` must be applied to ALL sandbox URLs displayed to users, not just `vscodeUrl` diff --git a/docs/rebase-analysis/01-path-mapping.md b/docs/rebase-analysis/01-path-mapping.md new file mode 100644 index 000000000..eb4276611 --- /dev/null +++ b/docs/rebase-analysis/01-path-mapping.md @@ -0,0 +1,130 @@ +# Path Mapping: develop → origin/main (DDD Restructure) + +## Package-Level Restructuring + +### src/ii_agent/ (Backend - MASSIVE restructure in #851) + +| Old Path (develop/topic) | New Path (origin/main) | Notes | +|---|---|---| +| `src/ii_agent/server/` | **REMOVED** - split into domain modules | Server monolith decomposed | +| `src/ii_agent/server/api/` | Domain-specific `api/router.py` per module | e.g., `chat/api/`, `files/router.py` | +| `src/ii_agent/server/app.py` | `src/ii_agent/app/` | App lifecycle extracted | +| `src/ii_agent/server/socket/` | `src/ii_agent/realtime/` | WebSocket/SocketIO handlers | +| `src/ii_agent/server/socket/command/query_handler.py` | `src/ii_agent/realtime/handlers/query.py` | | +| `src/ii_agent/server/socket/command/awake_sandbox_handler.py` | `src/ii_agent/realtime/handlers/awake_sandbox.py` | | +| `src/ii_agent/server/socket/command/sandbox_status_handler.py` | `src/ii_agent/realtime/handlers/sandbox_status.py` | | +| `src/ii_agent/server/socket/chat_session.py` | `src/ii_agent/realtime/chat_session.py` | | +| `src/ii_agent/server/socket/socketio.py` | `src/ii_agent/realtime/manager.py` | | +| `src/ii_agent/server/chat/` | `src/ii_agent/chat/` | Chat domain extracted | +| `src/ii_agent/server/chat/service.py` | `src/ii_agent/chat/application/chat_service.py` | | +| `src/ii_agent/server/chat/context_manager.py` | `src/ii_agent/chat/application/context_service.py` | | +| `src/ii_agent/server/chat/llm/anthropic/provider.py` | `src/ii_agent/chat/llm/anthropic/provider.py` | Similar path, different root | +| `src/ii_agent/server/chat/llm/openai.py` | `src/ii_agent/chat/llm/openai.py` | | +| `src/ii_agent/server/chat/router.py` | `src/ii_agent/chat/api/router.py` | | +| `src/ii_agent/server/chat/tools/file_search.py` | `src/ii_agent/chat/application/tool_service.py` | Likely merged | +| `src/ii_agent/server/api/files.py` | `src/ii_agent/files/router.py` | Files domain extracted | +| `src/ii_agent/server/api/auth.py` | `src/ii_agent/auth/` | Auth domain extracted | +| `src/ii_agent/server/api/sessions.py` | `src/ii_agent/sessions/` | Sessions domain extracted | +| `src/ii_agent/server/services/agent_service.py` | `src/ii_agent/agents/` (application layer) | Agent domain extracted | +| `src/ii_agent/server/services/file_service.py` | `src/ii_agent/files/service.py` | | +| `src/ii_agent/server/services/sandbox_service.py` | `src/ii_agent/agents/sandboxes/service.py` | | +| `src/ii_agent/server/llm_settings/` | `src/ii_agent/settings/llm/` | Settings domain | +| `src/ii_agent/server/llm_settings/models.py` | `src/ii_agent/settings/llm/models.py` | | +| `src/ii_agent/server/llm_settings/service.py` | `src/ii_agent/settings/llm/service.py` | | +| `src/ii_agent/server/messages/` | `src/ii_agent/agents/hooks/` | Hooks pattern | +| `src/ii_agent/server/models/messages.py` | Various domain schemas | Split per domain | +| `src/ii_agent/server/slides/` | `src/ii_agent/content/` | Content domain | +| `src/ii_agent/server/vectordb/` | **Needs investigation** | | +| `src/ii_agent/controller/` | `src/ii_agent/agents/` | Agent runtime | +| `src/ii_agent/controller/agent_controller.py` | `src/ii_agent/agents/agent.py` | Core agent loop | +| `src/ii_agent/controller/state.py` | `src/ii_agent/agents/` area | State mgmt | +| `src/ii_agent/controller/tool_manager.py` | `src/ii_agent/agents/factory/tool_manager.py` | | +| `src/ii_agent/adapters/` | **REMOVED** | Absorbed into domain modules | +| `src/ii_agent/adapters/sandbox_adapter.py` | `src/ii_agent/agents/sandboxes/` | | +| `src/ii_agent/llm/` | `src/ii_agent/agents/models/` | LLM providers | +| `src/ii_agent/llm/anthropic.py` | `src/ii_agent/agents/models/anthropic/claude.py` | | +| `src/ii_agent/llm/openai.py` | `src/ii_agent/agents/models/openai/completions.py` | | +| `src/ii_agent/prompts/` | `src/ii_agent/agents/prompts/` | | +| `src/ii_agent/prompts/agent_prompts.py` | `src/ii_agent/agents/prompts/agent_prompts.py` | | +| `src/ii_agent/prompts/system_prompt.py` | `src/ii_agent/agents/prompts/system_prompt.py` | | +| `src/ii_agent/sandbox/ii_sandbox.py` | `src/ii_agent/agents/sandboxes/` | | +| `src/ii_agent/storage/` | `src/ii_agent/core/storage/` | | +| `src/ii_agent/storage/base.py` | `src/ii_agent/core/storage/providers/base.py` | | +| `src/ii_agent/storage/factory.py` | `src/ii_agent/core/storage/` | | +| `src/ii_agent/storage/gcs.py` | `src/ii_agent/core/storage/providers/gcs.py` | | +| `src/ii_agent/storage/local.py` | `src/ii_agent/core/storage/providers/local.py` | **EXISTS in main!** | +| `src/ii_agent/sub_agent/` | `src/ii_agent/agents/` | Merged into agents | +| `src/ii_agent/core/config/ii_agent_config.py` | `src/ii_agent/core/config/settings.py` | Renamed | +| `src/ii_agent/core/config/llm_config.py` | `src/ii_agent/core/config/llm_config.py` | Same path | +| `src/ii_agent/core/event.py` | `src/ii_agent/realtime/events/` | Event system | +| `src/ii_agent/core/client_host.py` | **NEW - no equivalent** | Topic-branch-only | +| `src/ii_agent/db/manager.py` | `src/ii_agent/core/db/` | | +| `src/ii_agent/utils/constants.py` | `src/ii_agent/core/` area | | +| `src/ii_agent/cron/` | `src/ii_agent/workers/cron/` | | + +### src/ii_tool/ → src/ii_server/ (Tool Server renamed) + +| Old Path (develop/topic) | New Path (origin/main) | Notes | +|---|---|---| +| `src/ii_tool/` | `src/ii_server/` | Package renamed | +| `src/ii_tool/browser/` | `src/ii_server/browser/` ? OR `src/ii_agent/agents/tools/browser/` | Split | +| `src/ii_tool/integrations/` | Absorbed into `src/ii_agent/` domains | | +| `src/ii_tool/integrations/image_generation/` | `src/ii_agent/content/media/` | | +| `src/ii_tool/integrations/storage/` | `src/ii_agent/core/storage/` | | +| `src/ii_tool/integrations/video_generation/` | `src/ii_agent/content/media/` | | +| `src/ii_tool/interfaces/sandbox.py` | `src/ii_server/interfaces/sandbox.py` | | +| `src/ii_tool/tools/dev/register_port.py` | `src/ii_agent/agents/tools/sandbox/register_port.py` | | +| `src/ii_tool/tools/file_system/utils.py` | `src/ii_server/tools/` area | | +| `src/ii_tool/tools/mcp_tool.py` | `src/ii_server/mcp/` | | +| `src/ii_tool/tools/shell/shell_init.py` | `src/ii_server/tools/shell/` | | +| `src/ii_tool/utils.py` | `src/ii_server/utils.py` | | + +### src/ii_sandbox_server/ → REMOVED (absorbed into ii_agent) + +| Old Path (develop/topic) | New Path (origin/main) | Notes | +|---|---|---| +| `src/ii_sandbox_server/` | **REMOVED entirely** | Absorbed into `src/ii_agent/agents/sandboxes/` | +| `src/ii_sandbox_server/sandboxes/base.py` | `src/ii_agent/agents/sandboxes/base.py` | | +| `src/ii_sandbox_server/sandboxes/e2b.py` | `src/ii_agent/agents/sandboxes/e2b.py` | | +| `src/ii_sandbox_server/sandboxes/docker.py` | **DOES NOT EXIST in main** | Topic-branch-only | +| `src/ii_sandbox_server/sandboxes/port_manager.py` | **DOES NOT EXIST in main** | Topic-branch-only | +| `src/ii_sandbox_server/sandboxes/sandbox_factory.py` | **DOES NOT EXIST in main** | | +| `src/ii_sandbox_server/lifecycle/sandbox_controller.py` | `src/ii_agent/agents/sandboxes/service.py` | Likely merged | +| `src/ii_sandbox_server/client/client.py` | **Absorbed** | | +| `src/ii_sandbox_server/config.py` | `src/ii_agent/core/config/sandbox.py` | | +| `src/ii_sandbox_server/db/manager.py` | `src/ii_agent/core/db/` | | +| `src/ii_sandbox_server/main.py` | **No separate process** | Integrated | +| `src/ii_sandbox_server/models/payload.py` | `src/ii_agent/agents/sandboxes/models.py` | | + +### Tests → src/tests/ + +| Old Path (develop/topic) | New Path (origin/main) | Notes | +|---|---|---| +| `tests/` | `src/tests/` | Moved into src | +| `tests/conftest.py` | `src/tests/conftest.py` | | +| `tests/sandbox/` | `src/tests/unit/engine/` (sandbox tests) | | +| `tests/storage/` | `src/tests/unit/` area | | +| `tests/llm/` | `src/tests/unit/` area | | +| `tests/test_ii_tool/` | `src/tests/unit/` area | | +| `tests/tools/` | `src/tests/unit/` area | | + +### Docker/Config (mostly same paths) + +| Old Path | New Path | Notes | +|---|---|---| +| `docker/docker-compose.stack.yaml` | Same | Modified in both | +| `docker/docker-compose.local-only.yaml` | **NEW** | Topic-branch-only | +| `docker/docker-compose.local.yaml` | **NEW** | Topic-branch-only | +| `docker/.stack.env.local.example` | `docker/.stack.env.example` | Main has different example | +| `docker/backend/Dockerfile` | Same | Modified in both | +| `scripts/run_stack.sh` | `scripts/run_stack.sh` | Topic branch deleted, replaced with stack_control.sh | +| `scripts/stack_control.sh` | **NEW** | Topic-branch-only | + +## Key Observations + +1. **Main has a LocalStorage provider already**: `src/ii_agent/core/storage/providers/local.py` exists in main +2. **Sandbox server absorbed**: The entire `ii_sandbox_server` package no longer exists separately +3. **Tool server renamed**: `ii_tool` → `ii_server` +4. **Shell/sandbox execution refactored** in #865 with new architecture +5. **DDD structure**: Domain-Driven Design with proper bounded contexts +6. **Tests relocated**: All tests now under `src/tests/` diff --git a/docs/rebase-analysis/02-baseline-changes.md b/docs/rebase-analysis/02-baseline-changes.md new file mode 100644 index 000000000..441382038 --- /dev/null +++ b/docs/rebase-analysis/02-baseline-changes.md @@ -0,0 +1,140 @@ +# Baseline Changes Analysis: develop → origin/main + +## Executive Summary + +153 commits, 2,500 files changed, +501,149/-75,606 lines. +This represents a **massive architectural overhaul** from a monolithic server design to a Domain-Driven Design (DDD) structure. + +## Major Architectural Changes + +### 1. DDD Restructure (#851) — 1,483 files changed +The single largest commit. Completely reorganized `src/ii_agent/` from a monolithic `server/` package into bounded domain contexts: + +**Old (develop):** +``` +src/ii_agent/ +├── server/ # Monolithic server +│ ├── api/ # All HTTP endpoints +│ ├── chat/ # Chat service +│ ├── socket/ # WebSocket handlers +│ ├── services/ # Business logic +│ ├── models/ # Data models +│ └── slides/ # Slide processing +├── controller/ # Agent controller +├── llm/ # LLM providers +├── prompts/ # System prompts +├── storage/ # Storage backends +├── sandbox/ # Sandbox abstraction +├── sub_agent/ # Sub-agent tools +└── adapters/ # Adapter layer +``` + +**New (main):** +``` +src/ii_agent/ +├── agents/ # Agent runtime (replaces controller/, llm/, prompts/, sub_agent/, adapters/) +│ ├── models/ # LLM providers (replaces llm/) +│ ├── prompts/ # System prompts +│ ├── sandboxes/ # Sandbox management (replaces sandbox/, sandbox_server) +│ ├── tools/ # Agent-side tools +│ ├── factory/ # Agent/tool creation +│ ├── hooks/ # Agent hooks (replaces messages/) +│ ├── skills/ # Agent skills +│ └── sessions/ # Session management +├── app/ # FastAPI app lifecycle (replaces server/app.py) +├── auth/ # Authentication domain (replaces server/api/auth.py) +├── billing/ # Billing domain +├── chat/ # Chat domain (replaces server/chat/) +│ ├── api/ # Chat HTTP endpoints +│ ├── application/ # Chat business logic +│ └── llm/ # Chat LLM providers +├── content/ # Content domain (replaces server/slides/) +│ └── media/ # Media generation (replaces ii_tool/integrations/) +├── core/ # Shared infrastructure +│ ├── config/ # All configuration (settings.py replaces ii_agent_config.py) +│ ├── db/ # Database (replaces db/) +│ ├── storage/ # Storage providers (replaces storage/) +│ │ └── providers/ # gcs.py, local.py, minio.py +│ └── secrets/ # Secret management +├── credits/ # Credits domain +├── files/ # File management domain (replaces server/api/files.py) +├── integrations/ # External integrations +├── projects/ # Projects domain +├── realtime/ # WebSocket/SocketIO (replaces server/socket/) +│ ├── handlers/ # Socket command handlers +│ └── events/ # Event system +├── sessions/ # Sessions domain (replaces server/api/sessions.py) +├── settings/ # Settings domain (replaces server/llm_settings/) +│ ├── llm/ # LLM settings +│ └── mcp/ # MCP settings +├── tasks/ # Background tasks +├── users/ # User domain +└── workers/ # Background workers (replaces cron/) +``` + +### 2. Package Renames +- `src/ii_tool/` → `src/ii_server/` (tool server renamed) +- `src/ii_sandbox_server/` → **REMOVED** (absorbed into `src/ii_agent/agents/sandboxes/`) +- `tests/` → `src/tests/` (tests moved into src) + +### 3. Shell and Sandbox Execution Refactor (#865) +- New `src/ii_agent/agents/sandboxes/shell.py` — shell abstraction +- E2B-specific shell: `e2b_shell.py` +- Live terminal service: `live_terminal_service.py` +- Sandbox router: `router.py` +- Shell tools restructured: `src/ii_agent/agents/tools/shell/` + +### 4. Workspace Manager Removal (#825) +- `workspace_manager.py` completely removed +- Connector tools restructured + +### 5. A2A and MCP SSE Removal (#842) +- Agent-to-Agent protocol removed +- MCP SSE transport removed +- Simplification of integration layer + +### 6. Dev Tool → Skill Migration (#848) +- Development tools migrated from imperative tools to declarative skills +- `ii-app` skill created under `settings/skills/builtin/ii-app/` +- Template processor for project scaffolding + +### 7. Pricing/UUID Consolidation (#862) +- `uuid.UUID` types enforced across all API contracts +- Pricing consolidated into billing domain +- Chat API contracts refactored + +### 8. Media Path Refactor (#860) +- Media generation moved to `content/media/` +- Unified file asset handling + +### 9. Code Viewer with Watcher (#855) +- File tree, code viewer components added +- Sandbox file explorer capability + +## Features Already Present in Main That Topic Branch Also Implemented + +| Feature | Main Implementation | Topic Branch Implementation | Status | +|---|---|---|---| +| **Local Storage Provider** | `core/storage/providers/local.py` | `storage/local.py` + `ii_tool/integrations/storage/local.py` | **MAIN HAS IT** | +| **Storage Config with local** | `core/config/storage.py` (supports gcs/local/minio) | Modified `storage/` and config | **MAIN HAS IT** | +| **Docker enum in SandboxProviderType** | `agents/sandboxes/types.py` has `DOCKER = "docker"` | Added to sandbox factory | **MAIN HAS IT (enum only)** | +| **Sandbox Settings with docker** | `core/config/sandbox.py` has `docker` in Literal | Added docker config | **MAIN HAS IT (config only)** | +| **Sandbox Service with Docker reference** | `agents/sandboxes/service.py` references Docker | Built docker factory | **MAIN STUBS IT** | + +## Features NOT in Main That Topic Branch Provides + +| Feature | Description | Required Integration Point | +|---|---|---| +| **DockerSandbox Implementation** | Full Docker container lifecycle (974 lines) | `src/ii_agent/agents/sandboxes/docker.py` | +| **PortPoolManager** | Port 30000-30999 allocation for Docker containers | New file in `agents/sandboxes/` | +| **Orphan Container Cleanup** | Background cleanup loop for abandoned containers | Extend `agents/sandboxes/service.py` | +| **docker-compose.local-only.yaml** | Air-gapped Docker Compose stack | `docker/` | +| **docker-compose.local.yaml** | Hybrid compose file | `docker/` | +| **stack_control.sh** | Stack management script | `scripts/` | +| **Tool Execution Timeouts** | Timeout enforcement for tool calls | Agent runtime | +| **Mid-Tool Interruption** | Cancel running tools mid-execution | Agent runtime | +| **Agent-Human-Agent Handoff** | noVNC browser handoff mechanism | Agent + realtime | +| **Dynamic Token Budget** | Extended token budget for Claude 4.5 | Config/constants | +| **Various Bug Fixes** | WebSocket, image handling, slides, etc. | Various domains | +| **Comprehensive Test Suite** | 80+ test files | `src/tests/` | +| **Documentation** | Architecture, feature analysis, user guide | `docs/` | diff --git a/docs/rebase-analysis/03-three-way-assessment.md b/docs/rebase-analysis/03-three-way-assessment.md new file mode 100644 index 000000000..5a8c3ff0c --- /dev/null +++ b/docs/rebase-analysis/03-three-way-assessment.md @@ -0,0 +1,219 @@ +# Three-Way Diff Analysis & Change Assessment + +## Methodology +For each topic branch change, we assess: +1. **What changed** in the topic branch (from develop) +2. **What changed** in main (from develop) for the same area +3. **Whether the topic change still makes sense** given the new baseline + +## Tier 0: Configuration & Constants (Foundation) + +### TOKEN_BUDGET_EXTENDED = 800,000 (ii_agent_config.py / llm_config.py) +- **Topic**: Added `TOKEN_BUDGET_EXTENDED = 800_000` for Claude 4.5 +- **Main**: `ii_agent_config.py` → `core/config/settings.py` — completely restructured with pydantic-settings +- **Assessment**: Check if main already has extended token budget. If not, add to `core/config/settings.py` +- **Verdict**: **NEEDS PORTING** — check if already addressed in main's config + +### Default storage provider change (gcs → local) +- **Topic**: Changed default from `"gcs"` to `"local"` in storage config +- **Main**: `core/config/storage.py` already supports `local` but defaults to `"gcs"` +- **Assessment**: For local-only mode, this should be set in env vars, not hardcoded +- **Verdict**: **DROP** — main handles this correctly via env config + +### Sandbox config additions (provider_type, docker_image, docker_network, etc.) +- **Topic**: Added multiple sandbox config options: `provider_type`, `docker_image`, `docker_network`, `local_mode`, `orphan_cleanup_*`, `backend_url` +- **Main**: `core/config/sandbox.py` already has `SandboxSettings` with pydantic-settings, supports `docker` provider enum +- **Assessment**: Port Docker-specific settings (docker_image, docker_network, port range) into existing `SandboxSettings` +- **Verdict**: **NEEDS PORTING** — extend `SandboxSettings` with Docker-specific fields + +### expose_port() — external parameter +- **Topic**: Added `external` parameter to `expose_port()` method in sandbox base +- **Main**: `agents/sandboxes/base.py` does not have this parameter +- **Assessment**: This is needed for local Docker mode where port mapping differs +- **Verdict**: **NEEDS PORTING** — add to new base class + +## Tier 1: Infrastructure Components + +### PortPoolManager (port_manager.py — 480 lines, NEW) +- **Topic**: Created `src/ii_sandbox_server/sandboxes/port_manager.py` +- **Main**: No equivalent exists. Port management not implemented. +- **Assessment**: Core infrastructure for Docker sandbox. Needs new location: `src/ii_agent/agents/sandboxes/port_manager.py` +- **Verdict**: **PORT DIRECTLY** — new file, no conflicts + +### LocalStorage (backend side — storage/local.py) +- **Topic**: Created `src/ii_agent/storage/local.py` with path traversal protection, .meta sidecar files, URL download +- **Main**: Already has `src/ii_agent/core/storage/providers/local.py` with `LocalProvider` class +- **Assessment**: Main's LocalProvider uses pathlib, topic branch uses os.path. Main's implementation is cleaner but may be missing some features (e.g., .meta sidecar, content-type tracking). Need to compare feature sets. +- **Verdict**: **MERGE/EXTEND** — preserve main's implementation, add any missing features + +### LocalStorage (tool-server side — ii_tool/integrations/storage/local.py) +- **Topic**: Created `src/ii_tool/integrations/storage/local.py` — duplicate of backend local storage +- **Main**: `ii_tool` no longer exists; integrations absorbed into `ii_agent` domains +- **Assessment**: The tool-server storage is now handled by main's unified storage. This file is irrelevant. +- **Verdict**: **DROP** — main has unified storage + +### Storage Factory (storage/factory.py) +- **Topic**: Modified to route to LocalStorage based on config +- **Main**: Storage factory is likely in `core/storage/` — already supports local routing +- **Assessment**: Main already handles local storage factory routing +- **Verdict**: **DROP** — main covers this + +## Tier 2: Docker Sandbox Implementation + +### DockerSandbox (docker.py — 974 lines, NEW) +- **Topic**: Created `src/ii_sandbox_server/sandboxes/docker.py` — full Docker container lifecycle +- **Main**: `agents/sandboxes/service.py` has `SandboxProviderType.DOCKER` enum but raises `SandboxCreationError("Unsupported provider: docker")` +- **Assessment**: Core feature. Must be ported to `src/ii_agent/agents/sandboxes/docker.py`, implementing the new `Sandbox` base class API from main +- **Verdict**: **NEEDS MAJOR REWORK** — rewrite to implement main's `Sandbox` ABC with Shell, LiveTerminal, and file explorer APIs + +### sandbox_factory.py +- **Topic**: Created factory for e2b/docker sandbox creation +- **Main**: Factory logic is in `agents/sandboxes/service.py._create_provider()`. Just add Docker branch. +- **Assessment**: Add Docker provider creation to existing `_create_provider` and `_connect_provider` +- **Verdict**: **MERGE INTO service.py** — simple addition + +## Tier 3: Orchestration + +### Sandbox Controller Orphan Cleanup (~120 lines) +- **Topic**: Added to `src/ii_sandbox_server/lifecycle/sandbox_controller.py` +- **Main**: `ii_sandbox_server` no longer exists. Sandbox service is in `agents/sandboxes/service.py` +- **Assessment**: Port orphan cleanup as a method/background task in `SandboxService` or as a worker in `workers/cron/` +- **Verdict**: **NEEDS PORTING** — adapt to main's architecture, likely in workers/cron/ + +### client/client.py changes +- **Topic**: Modified sandbox client for Docker support +- **Main**: Client/server split removed — sandbox is in-process now +- **Assessment**: The client abstraction is gone. Docker sandbox is called directly. +- **Verdict**: **DROP** — architecture changed + +## Tier 4: API/Integration Layer + +### File upload endpoints (server/api/files.py) +- **Topic**: Added `PUT /files/upload/{path}`, `GET /files/{path}` with token auth +- **Main**: `files/router.py` handles file endpoints. Completely restructured. +- **Assessment**: Check if main's file router supports the upload/serve endpoints needed for local mode +- **Verdict**: **CHECK AND PORT** — may need to add local file serving endpoint + +### Backend server/app.py changes +- **Topic**: Various startup modifications for local mode +- **Main**: `app/__init__.py`, `app/lifespan.py` — completely different +- **Assessment**: Local mode startup needs to be adapted to new app lifecycle +- **Verdict**: **NEEDS REWORK** — adapt to new lifespan hooks + +### chat/context_manager.py, chat/service.py, chat/router.py changes +- **Topic**: Various fixes for chat in local mode +- **Main**: Complete restructure — `chat/application/chat_service.py`, `chat/api/router.py` +- **Assessment**: The specific fixes need to be evaluated against new code +- **Verdict**: **NEEDS INDIVIDUAL EVALUATION** in new codebase + +### WebSocket handlers (socket/ → realtime/) +- **Topic**: Modified query_handler, awake_sandbox_handler, sandbox_status_handler, socketio +- **Main**: All renamed and restructured under `realtime/handlers/` +- **Assessment**: Changes need individual evaluation. The event system is completely different. +- **Verdict**: **NEEDS REWORK** — adapt changes to new event system + +### LLM provider changes (llm/anthropic.py, llm/openai.py) +- **Topic**: Streaming timeout fixes, safety net improvements +- **Main**: `agents/models/anthropic/claude.py`, `agents/models/openai/completions.py` — rewritten +- **Assessment**: Check if streaming timeout issues exist in main's implementations +- **Verdict**: **CHECK AND PORT** — may already be fixed differently + +### Sub-agent changes (sub_agent/ → agents/) +- **Topic**: Added interrupt events, task_agent_tool, design_document_agent modifications +- **Main**: Sub-agents restructured. `agents/factory/agent.py` builds sub-agents differently +- **Assessment**: Interrupt events may map to main's cancellation system +- **Verdict**: **NEEDS EVALUATION** — check if interrupts are handled by Redis cancel + +## Tier 5: Frontend + +### Frontend component changes +- **Topic**: Modified 16 frontend files for sandbox status, agent UI, websocket +- **Main**: Modified same 16 files with various refactors +- **Assessment**: Frontend mostly kept same paths. Need three-way merge for each file. +- **Verdict**: **NEEDS THREE-WAY MERGE** — file by file + +### Frontend test files (NEW) +- **Topic**: Created `frontend/src/lib/__tests__/utils.test.ts` and `agent-sandbox-status.test.ts` +- **Main**: These specific test files don't exist in main +- **Assessment**: Tests are additive but may need updating for changed APIs +- **Verdict**: **PORT AND UPDATE** — update test imports/APIs + +## Tier 6: Docker/Compose/Scripts + +### docker-compose.local-only.yaml (NEW) +- **Topic**: Complete air-gapped compose file, 194 lines +- **Main**: Main has docker-compose.stack.yaml (updated) and docker-compose.dev.yaml (new) +- **Assessment**: Local-only compose needs updating for new service structure (no more sandbox-server/tool-server as separate services) +- **Verdict**: **NEEDS MAJOR REWORK** — adapt to main's compose structure + +### docker-compose.local.yaml (NEW) +- **Topic**: Hybrid compose overlay +- **Main**: No equivalent +- **Assessment**: Same as above — needs adapting +- **Verdict**: **NEEDS REWORK** — adapt to main's structure + +### stack_control.sh (NEW) +- **Topic**: Created comprehensive stack management script +- **Main**: `scripts/run_stack.sh` exists but is simpler +- **Assessment**: Standalone script, mostly portable. Update compose file references. +- **Verdict**: **PORT AND UPDATE** — update paths/references + +### docker/backend/Dockerfile changes +- **Topic**: Modified for local mode build args +- **Main**: Modified for new package structure +- **Assessment**: Need three-way merge +- **Verdict**: **NEEDS THREE-WAY MERGE** + +### e2b.Dockerfile changes +- **Topic**: Updated sandbox image +- **Main**: Also updated sandbox image +- **Assessment**: Three-way merge +- **Verdict**: **NEEDS THREE-WAY MERGE** + +## Tier 7: Tests + +### Comprehensive test suite (~80 files) +- **Topic**: Created under `tests/` — sandbox, storage, LLM, tool tests +- **Main**: Tests moved to `src/tests/` — completely different structure +- **Assessment**: All test files need relocation to `src/tests/unit/` and import path updates +- **Verdict**: **PORT ALL** — update paths, imports, and assertions for new APIs + +## Tier 8: Documentation + +### Existing topic branch docs +- architecture-local-to-cloud.md — Architecture evolution guide +- feature-branch-analysis.md — Feature specification +- local-docker-sandbox.md — User guide +- **Assessment**: All documentation remains relevant. Update for new paths/structure. +- **Verdict**: **PORT AND UPDATE** — update all paths/references + +## Summary: Change Categories + +### Directly Portable (New files, no conflicts) +1. PortPoolManager → `agents/sandboxes/port_manager.py` +2. html_to_pdf.py (script) +3. stack_control.sh (with path updates) +4. admin_credits.sh (script) +5. Documentation files (with content updates) +6. docker/.stack.env.local.example (with updates) + +### Needs Major Rework (Architecture changed) +1. DockerSandbox → rewrite for new Sandbox ABC +2. docker-compose.local-only.yaml → adapt for new compose structure +3. Orphan cleanup → move to workers/cron +4. Frontend changes → three-way merge each file + +### Check and Port (May already be fixed in main) +1. Image compression → main has `compress_image_for_provider` +2. Streaming timeouts → check new LLM providers +3. Failed tool lookup handling → check new tool system +4. ThinkingBlock trailing fix → check new model response handling +5. WebSocket session priority → check new realtime system + +### Drop (Superseded by main) +1. LocalStorage backend (main has LocalProvider) +2. LocalStorage tool-server (ii_tool doesn't exist) +3. Storage factory changes (main has unified storage) +4. Client/client.py changes (client/server split removed) +5. Default storage=local (use env vars instead) +6. ii_sandbox_server scaffolding (absorbed into ii_agent) diff --git a/docs/rebase-analysis/04-rebase-plan.md b/docs/rebase-analysis/04-rebase-plan.md new file mode 100644 index 000000000..e78726900 --- /dev/null +++ b/docs/rebase-analysis/04-rebase-plan.md @@ -0,0 +1,211 @@ +# Detailed Rebase Plan: feat/local-docker-sandbox onto origin/main + +## Strategy: Manual Cherry-Pick Rebase + +Instead of `git rebase`, we will: +1. Create a new branch `rebase/local-docker-sandbox` from `origin/main` +2. Manually port changes from the topic branch, adapted to the new architecture +3. Commit in logical groups (leaf-to-root dependency tiers) +4. Validate each commit builds and tests pass + +## Pre-Rebase Checklist + +- [x] Topic branch squashed to single commit (b93a325) +- [x] Path mapping documented (01-path-mapping.md) +- [x] Baseline changes documented (02-baseline-changes.md) +- [x] Three-way assessment completed (03-three-way-assessment.md) +- [ ] New branch created from origin/main +- [ ] Rebase commits executed + +--- + +## Commit Plan (7 Commits, Leaf-to-Root) + +### Commit 1: Configuration & Constants +**Files to create/modify:** +- `src/ii_agent/core/config/sandbox.py` — Add Docker-specific settings: + - `docker_image: str = "ii-agent-sandbox:latest"` + - `docker_network: str = "ii-agent-local_ii-network"` + - `port_range_start: int = 30000` + - `port_range_end: int = 30999` + - `orphan_cleanup_enabled: bool = True` + - `orphan_cleanup_interval_seconds: int = 60` + - `backend_url: str = "http://backend:8000"` + - `local_mode: bool = False` + +**Status:** NEW WORK — extend existing pydantic-settings class + +### Commit 2: Port Pool Manager (Infrastructure) +**Files to create:** +- `src/ii_agent/agents/sandboxes/port_manager.py` — Port from topic branch + - Update imports from `ii_sandbox_server` → `ii_agent.agents.sandboxes` + - Update config access to use `Settings.sandbox.*` instead of env vars directly + - Keep core logic intact (thread-safe allocation, startup scanning, background cleanup) + +**Tests to create:** +- `src/tests/unit/agent/test_port_manager.py` — Port from `tests/sandbox/test_port_manager.py` + - Update imports + - Update class references + +**Status:** MOSTLY PORTABLE — import/config updates only + +### Commit 3: Docker Sandbox Provider (Core Feature) +**Files to create:** +- `src/ii_agent/agents/sandboxes/docker.py` — **MAJOR REWORK** required + - Must implement main's `Sandbox` ABC (from `agents/sandboxes/base.py`) + - Required methods: `get_info()`, `get_status()`, `get_provider_id()`, `upload_path`, + `create()`, `run_command()`, `upload()`, `download()`, `expose_port()`, `kill()`, + `get_file_tree()`, `get_file_content()`, `write_file()`, `delete_file()` + - Must support main's `Shell` abstraction (`agents/sandboxes/shell.py`) + - Must support `LiveTerminalHandle` for terminal streaming + - Must integrate with `PortPoolManager` for port allocation + - Class: `DockerSandbox(Sandbox)` with `PROVIDER = SandboxProviderType.DOCKER` + +**Files to modify:** +- `src/ii_agent/agents/sandboxes/service.py` — Add Docker to `_create_provider()` and `_connect_provider()` + - Add: `from ii_agent.agents.sandboxes.docker import DockerSandbox` + - Add Docker case in `_create_provider()`: Return `DockerSandbox.create(...)` + - Add Docker case in `_connect_provider()`: Return `DockerSandbox.connect(...)` + +**Tests to create:** +- `src/tests/unit/agent/test_docker_sandbox.py` — Rewrite from `tests/sandbox/test_docker_sandbox.py` +- `src/tests/unit/agent/test_sandbox_factory.py` — Rewrite from `tests/sandbox/test_sandbox_factory.py` + +**Status:** MAJOR REWORK — new base class API, shell/terminal integration + +### Commit 4: Orphan Cleanup & Lifecycle (Orchestration) +**Files to create/modify:** +- `src/ii_agent/workers/cron/jobs/orphan_cleanup.py` — New file + - Port orphan cleanup logic from `ii_sandbox_server/lifecycle/sandbox_controller.py` + - Use `SandboxService` and `SandboxRepository` instead of direct DB queries + - Register as a cron job in main's worker system + +- OR integrate into `src/ii_agent/agents/sandboxes/service.py` as: + - `async def cleanup_orphan_sandboxes(self, grace_period_seconds: int = 300) -> int` + - Background task started in app lifespan + +**Tests:** +- `src/tests/unit/agent/test_orphan_cleanup.py` + +**Status:** MODERATE REWORK — use main's DB/service patterns + +### Commit 5: Docker Compose & Deployment Scripts +**Files to create:** +- `docker/docker-compose.local.yaml` — Docker Compose overlay for local Docker sandbox mode + - Adapt from topic branch's local-only.yaml + - **Critical:** No separate sandbox-server or tool-server services (absorbed into backend) + - Add minio service (main uses minio for local storage instead of filesystem) + - Keep: postgres, redis, frontend, backend services + - Ensure backend has Docker socket mount for spawning sandbox containers + - Add sandbox Docker network configuration + +- `docker/.stack.env.local.example` — Local mode env example + - Update for new env var names (SANDBOX_PROVIDER, STORAGE_PROVIDER, etc.) + +- `scripts/stack_control.sh` — Port with updates + - Update compose file references + - Update service names for new architecture + +**Files to modify:** +- `docker/docker-compose.stack.yaml` — Add Docker socket mount option for backend + - Add conditional volume mount for `/var/run/docker.sock` + +**Status:** MODERATE REWORK — new compose structure, no separate sandbox-server + +### Commit 6: Frontend Changes (Three-Way Merge) +**Files to evaluate and selectively port:** +- `frontend/src/typings/agent.ts` — Check if `'stopped'` maps to `CANCELLED` or `SYSTEM_INTERRUPTED` in main +- `frontend/src/state/slice/agent.ts` — Sandbox status tracking changes +- `frontend/src/contexts/websocket-context.tsx` — Session priority changes +- `frontend/src/hooks/use-app-events.tsx` — Event handler updates +- `frontend/src/hooks/use-session-manager.tsx` — Session management +- `frontend/src/components/agent/agent-result.tsx` — Result display +- `frontend/src/components/agent/subagent-container.tsx` — Subagent UI +- `frontend/src/app/routes/agent.tsx` — Route changes + +**For each file:** +1. Read main's version +2. Read topic branch's version +3. Identify topic-branch-only functional changes +4. Apply only those changes to main's version +5. Skip cosmetic/structural changes that conflict with main's refactoring + +**New tests to port:** +- `frontend/src/lib/__tests__/utils.test.ts` +- `frontend/src/state/__tests__/agent-sandbox-status.test.ts` — update for new types + +**Status:** CAREFUL THREE-WAY MERGE — per-file evaluation needed + +### Commit 7: Documentation & Remaining Files +**Files to create/update:** +- `docs/docs/architecture-local-to-cloud.md` — Update all paths for new structure +- `docs/docs/local-docker-sandbox.md` — Update for new compose, env vars, paths +- `docs/docs/feature-branch-analysis.md` — Update with new architecture mapping +- `scripts/html_to_pdf.py` — Port directly (standalone script) +- `scripts/admin_credits.sh` — Port directly (standalone script) +- `.github/copilot-instructions.md` — Port directly + +**Status:** MOSTLY PORTABLE — content updates for new paths + +--- + +## Changes to DROP (Superseded by Main) + +| Change | Reason | +|---|---| +| `src/ii_agent/storage/local.py` | Main has `core/storage/providers/local.py` | +| `src/ii_agent/storage/factory.py` mods | Main has unified storage factory | +| `src/ii_agent/storage/base.py` mods | Main has `core/storage/providers/base.py` | +| `src/ii_agent/storage/gcs.py` mods | Main has `core/storage/providers/gcs.py` | +| `src/ii_agent/storage/__init__.py` mods | Main has `core/storage/__init__.py` | +| `src/ii_tool/integrations/storage/*` | `ii_tool` no longer exists | +| `src/ii_tool/integrations/image_generation/*` | Moved to `content/media/` | +| `src/ii_tool/integrations/video_generation/*` | Moved to `content/media/` | +| `src/ii_sandbox_server/*` (scaffolding) | Absorbed into `ii_agent/agents/sandboxes/` | +| `src/ii_agent/server/*` modifications | Server monolith decomposed into domains | +| Image compression in agent_controller | Main has `compress_image_for_provider` | +| `requests` → `httpx` migration | Main already uses httpx | +| Default storage=local | Use env vars | +| `client/client.py` changes | No more client/server split | +| `scripts/run_stack.sh` replacement | Bring stack_control.sh alongside, don't delete run_stack.sh | + +## Changes to VERIFY Before Porting + +| Change | Check | +|---|---| +| ThinkingBlock trailing fix | Does main's `agents/agent.py` handle this? | +| Failed tool lookup handling | Does main's tool system handle missing tools? | +| WebSocket session priority | Does main's realtime system handle priority? | +| Streaming timeout fixes | Does main's anthropic provider have timeouts? | +| Subagent interrupt events | Does main's cancellation cover this? | + +--- + +## Execution Order + +1. **Create branch** `rebase/local-docker-sandbox` from `origin/main` +2. **Commit 1**: Config changes (smallest, foundation) +3. **Commit 2**: Port manager (leaf dependency, self-contained) +4. **Commit 3**: Docker sandbox (depends on 1 & 2) +5. **Commit 4**: Orphan cleanup (depends on 3) +6. **Commit 5**: Compose & scripts (depends on 1-4) +7. **Commit 6**: Frontend (can be parallel with 5, done after for testing) +8. **Commit 7**: Documentation (last, references everything) + +## Validation After Each Commit + +1. `python -c "import ii_agent"` — basic import check +2. `pytest src/tests/ -x --tb=short` — run existing tests +3. `pytest src/tests/unit/agent/test_port_manager.py` (after commit 2) +4. `pytest src/tests/unit/agent/test_docker_sandbox.py` (after commit 3) +5. Full test suite after commit 7 + +## Risk Assessment + +| Risk | Severity | Mitigation | +|---|---|---| +| Docker sandbox doesn't implement full Sandbox ABC | HIGH | Implement all abstract methods, stub if needed | +| Shell abstraction incompatible with Docker exec | MEDIUM | Implement DockerShell similar to E2BShell | +| Compose file doesn't match new service structure | MEDIUM | Test with `docker compose config` | +| Frontend event changes break UI | LOW | Test manually after merge | +| Test import paths broken | LOW | Systematic find-and-replace | diff --git a/docs/rebase-analysis/05-post-rebase-audit.md b/docs/rebase-analysis/05-post-rebase-audit.md new file mode 100644 index 000000000..cfbe7682b --- /dev/null +++ b/docs/rebase-analysis/05-post-rebase-audit.md @@ -0,0 +1,239 @@ +# Post-Rebase Audit: `rebase/local-docker-sandbox` + +## Executive Summary + +The 7-commit rebase onto `origin/main` successfully ported the core Docker sandbox functionality. **39 files** were changed (from 155 in the original topic branch). The 116 unported files were analyzed — most are correctly unported (old module structure that was rewritten by DDD restructure #851 on main). However, the audit identified: + +- **3 critical architectural issues** in the ported code +- **4 high-priority issues** needing attention +- **3 missing features** that should be ported +- **2 regressions** to fix before merge +- **Several nice-to-have improvements** from the original branch that were not Docker-specific + +--- + +## Part 1: Completeness — What Was Missed + +### 1.1 Correctly Unported (No Action Needed) + +| Category | Files | Reason | +|----------|-------|--------| +| `src/ii_sandbox_server/` | 8 | Absorbed into `agents/sandboxes/` on main | +| `src/ii_tool/` (most files) | ~12 | Now `ii_server/` on main | +| `src/ii_agent/server/` | 26 | DDD restructure rewrote all | +| `src/ii_agent/controller/`, `llm/`, `sub_agent/`, `storage/` | ~20 | Completely rewritten on main | +| Old `tests/` structure | 40+ | Moved to `src/tests/` | +| `uv.lock` | 1 | Auto-generated | +| `frontend/pnpm-lock.yaml` | 1 | Auto-generated (but see §2.2) | + +### 1.2 Features That SHOULD Be Ported + +#### A. VNC Services in Sandbox Image (BLOCKING for human-in-the-loop) +**Original files:** `e2b.Dockerfile`, `docker/sandbox/start-services.sh` +**What's missing:** +- `e2b.Dockerfile`: Missing `x11vnc` and `novnc` package installs +- `start-services.sh`: Missing Xvfb display setup, x11vnc server startup, noVNC websockify startup, health checks for VNC processes, `/workspace` ownership fix (`chown -R pn:pn`) +- The sandbox code allocates `NOVNC_PORT = 6080` but nothing actually starts on that port + +**Impact:** Human-in-the-loop sandbox access (browser VNC) will not work. + +#### B. Client Host URL Rewriting (BLOCKING for remote access) +**Original file:** `src/ii_agent/core/client_host.py` +**What's missing:** A `ContextVar` that stores the connecting browser's hostname. `DockerSandbox.expose_port()` returns hardcoded `http://localhost:{port}` — this breaks when the browser is on a different machine than the Docker host. + +**Impact:** Docker sandbox URLs won't work from any machine other than localhost. + +#### C. `docker` Python Package Dependency (BLOCKING for fresh installs) +**Original file:** `pyproject.toml` +**What's missing:** `docker>=7.0.0` is not in `pyproject.toml` dependencies. It happens to be installed in the current environment (`7.1.0`) but `uv sync` on a fresh clone will not install it. + +**Impact:** `import docker` in `docker.py` will fail on fresh installs. + +### 1.3 Nice-to-Have Features Not Ported (Non-Docker-Specific) + +These were co-developed on the topic branch but are general improvements: + +| Feature | Original Files | Status on Main | +|---------|---------------|----------------| +| DALL-E 3 image generation client | `ii_tool/integrations/image_generation/openai_dalle.py` + factory | Missing — generic video gen framework exists but no DALL-E 3 | +| Sora video generation | `ii_tool/integrations/video_generation/` (5 files) | Missing — can be added later | +| Browser tab limit (MAX_TABS=50) | `ii_tool/browser/browser.py` | Missing — resource exhaustion protection | +| Shell session limit (MAX_SHELL_SESSIONS=10) | `ii_tool/tools/shell/shell_init.py` | Missing — tmux session leak protection | +| Tool server local file serving | `ii_tool/integrations/app/main.py` `/storage/` endpoint | Missing — needed for local-mode file access | +| MCP tool image bridging | `ii_tool/tools/mcp_tool.py` `_process_image_inputs()` | Missing — external MCP servers can't read sandbox files | +| Dynamic token budget | `core/config/llm_config.py` `get_max_context_tokens()` | Missing — uses static config on main | + +### 1.4 Already Exists on Main (Verified) + +| Feature | Status | +|---------|--------| +| Image compression (5MB Anthropic limit) | ✅ `chat/application/file_processor.py` | +| ThinkingBlock sanitization | ✅ `chat/llm/anthropic/provider.py` + tests | +| Failed tool lookup error handling | ✅ Error `ToolResult` on unknown tool | +| Frontend sessionId priority (URL > Redux) | ✅ `websocket-context.tsx` | +| Orphan cleanup (no HTTP endpoint needed) | ✅ Uses Docker API directly | + +--- + +## Part 2: Regressions + +### 2.1 pnpm-lock.yaml Not Updated for vitest +**File:** `frontend/package.json` lists `"vitest": "^3.2.1"` in devDependencies and has test scripts. +**Problem:** `frontend/pnpm-lock.yaml` has 0 occurrences of "vitest" — it was never regenerated. +**Impact:** `pnpm install --frozen-lockfile` in CI will fail. Frontend tests ("vitest run") will fail. +**Fix:** Run `cd frontend && pnpm install` to regenerate lockfile. + +### 2.2 Backend `/auth/dev/login` Endpoint Does Not Exist +**File:** `frontend/src/app/routes/login.tsx` adds DevLoginButton that calls `/auth/dev/login`. +**Problem:** No backend endpoint exists at that path. The button is safely hidden (returns null when endpoint returns non-200), but the feature is dead code. +**Impact:** Local-mode dev login doesn't work. Not blocking (button hidden gracefully), but a missing feature. + +--- + +## Part 3: Architectural Issues + +### 3.1 CRITICAL + +#### A. Exception Hierarchy Violation +**File:** `src/ii_agent/agents/sandboxes/exceptions.py` +**Problem:** `SandboxException` inherits from `Exception` instead of `IIAgentError`. +**Impact:** Global error handler (`ii_agent_error_handler`) won't catch sandbox exceptions. Error responses bypass schema validation. HTTP status codes may be wrong. +**Fix:** +```python +from ii_agent.core.exceptions import IIAgentError + +class SandboxException(IIAgentError): + pass +``` + +#### B. PortPoolManager Uses threading.Lock (Blocks Event Loop) +**File:** `src/ii_agent/agents/sandboxes/port_manager.py` +**Problem:** `self._port_lock = threading.Lock()` — when `DockerSandbox.create()` awaits `allocate_ports()`, the blocking lock freezes the entire asyncio event loop. +**Impact:** Under concurrent sandbox creation, the server becomes unresponsive. +**Fix:** Convert to `asyncio.Lock` or use `asyncio.to_thread()` wrapper. + +#### C. Orphan Cleanup Bypasses Service Layer +**File:** `src/ii_agent/agents/sandboxes/orphan_cleanup.py` +**Problem:** Creates `DockerSandbox` directly and calls `kill()` instead of going through `SandboxService`. Also uses `get_db_session_local()` directly instead of DI. +**Impact:** DB state sync issues if `SandboxService.pause_sandbox()` is called concurrently. Pattern violation. +**Fix:** Use `SandboxService` for sandbox lifecycle operations. + +### 3.2 HIGH PRIORITY + +#### D. Docker Client Singleton Race Condition +**File:** `src/ii_agent/agents/sandboxes/docker.py` (lines ~151-154) +**Problem:** `_get_docker_client()` uses a `None` check without locking — two concurrent calls can create two clients. +**Fix:** Use double-checked locking or `asyncio.Lock`. + +#### E. Port Constants Hardcoded +**File:** `src/ii_agent/agents/sandboxes/docker.py` (lines 58-72) +**Problem:** `MCP_SERVER_PORT = 6060`, `CODE_SERVER_PORT = 9000`, `NOVNC_PORT = 6080` are module constants instead of settings. +**Fix:** Move to `SandboxSettings` with configurable defaults. + +#### F. scan_existing_containers() Never Called at Startup +**File:** `src/ii_agent/agents/sandboxes/port_manager.py` +**Problem:** `PortPoolManager.scan_existing_containers()` exists (~70 lines) but is never called during lifespan startup. If the server restarts, previously allocated ports won't be tracked. +**Fix:** Add call to `app/lifespan.py` startup sequence. + +#### G. DANGEROUS_PATTERNS Regex Defined But Unused +**File:** `src/ii_agent/agents/sandboxes/docker.py` (lines 75-80) +**Problem:** Security regex for strict command validation exists but is never called. +**Fix:** Either integrate into `run_command()` or remove dead code. + +### 3.3 MEDIUM + +| Issue | File | Description | +|-------|------|-------------| +| Resource cleanup lacks exception safety | docker.py `kill()` | Port release can leak if container removal fails | +| Global task tracking race | orphan_cleanup.py | `start_orphan_cleanup()` could create duplicate tasks | +| Logging inconsistency | port_manager.py | Uses stdlib logging; main may use structlog | + +--- + +## Part 4: Frontend Analysis + +### 4.1 Verified Clean ✅ + +| Item | Status | +|------|--------| +| `isDesignModeAvailable` uses `isSandboxLink()` | ✅ Correctly migrated | +| `isE2bLink` → `isSandboxLink` migration complete | ✅ No stale references in production code | +| `sandboxStatus` state initialized and cleared | ✅ Proper Redux lifecycle | +| `rewriteLocalhostUrl()` edge cases | ✅ Handles null, same-host, portless URLs | +| Model entries (claude-opus-4-6, claude-sonnet-4-6) | ✅ Follow existing pattern | +| DevLoginButton security | ✅ Hidden by default, backend-gated | +| Sub-agent STOPPED status | ✅ Consistent with backend RunStatus enum | + +### 4.2 Issues + +| Issue | Severity | Description | +|-------|----------|-------------| +| vitest not in lockfile | ⚠️ Regression | `pnpm install` needed | +| DevLoginButton dead code | ℹ️ Info | Backend endpoint missing | + +--- + +## Part 5: Test Coverage Assessment + +### 5.1 Existing Tests + +| Test File | Lines | Coverage | +|-----------|-------|----------| +| `test_docker_sandbox.py` | 446 | Path validation (20+ cases), create/kill, port mapping | +| `test_port_manager.py` | 837 | Allocation, deallocation, range bounds | +| `test_orphan_cleanup.py` | 122 | Grace period, cleanup loop | +| `utils.test.ts` | ~100 | rewriteLocalhostUrl, isSandboxLink, isE2bLink | +| `agent-sandbox-status.test.ts` | ~80 | sandboxStatus reducer | + +### 5.2 Missing Test Coverage + +| Gap | Impact | +|-----|--------| +| No async lock contention test | Won't catch event loop blocking | +| No port exhaustion test | Error path untested | +| No scan_existing_containers integration test | Startup recovery untested | +| No end-to-end create→verify→kill test | Integration gaps | +| orphan_cleanup tests don't verify DB state | State sync untested | + +--- + +## Part 6: Recommendations + +### Before Merge (Mandatory) + +1. **Fix exception hierarchy** — `SandboxException(IIAgentError)` (15 min) +2. **Add `docker>=7.0.0`** to `pyproject.toml` dependencies (5 min) +3. **Regenerate `pnpm-lock.yaml`** with vitest (5 min) +4. **Convert PortPoolManager to asyncio.Lock** (1-2 hr) + +### Before Docker Sandbox is Production-Ready + +5. **Add VNC services** to `e2b.Dockerfile` and `start-services.sh` +6. **Implement client host URL rewriting** for remote access +7. **Add `scan_existing_containers()` to lifespan startup** +8. **Implement `/auth/dev/login`** backend endpoint +9. **Add exception safety** to `kill()` cleanup +10. **Wire orphan cleanup through SandboxService** + +### Future Improvements (Separate PRs) + +11. Port browser tab limit (MAX_TABS=50) +12. Port shell session limit (MAX_SHELL_SESSIONS=10) +13. Port tool server local file serving +14. Port DALL-E 3 / Sora clients (if needed) +15. Port MCP tool image bridging +16. Move hardcoded port constants to SandboxSettings + +--- + +## Appendix: File Classification Summary + +| Classification | Count | Description | +|---------------|-------|-------------| +| ALREADY_HANDLED | ~12 | Ported to new locations | +| MAIN_REWROTE | ~55 | Old modules completely rewritten by main | +| SHOULD_CHECK | ~30 | Investigated — most are main-equivalent or nice-to-have | +| COSMETIC | ~6 | Typo fixes, debug logs, import fixes | +| MISSED | 7 | VNC packages, VNC startup, client_host, docker dep, lockfile, DALL-E 3, Sora | + +Of the 7 MISSED items: 3 are Docker-blocking (VNC, client_host, docker dep), 2 are regressions (lockfile, dead DevLogin), 2 are separate features (DALL-E 3, Sora). diff --git a/docs/rebase-analysis/06-full-feature-audit.md b/docs/rebase-analysis/06-full-feature-audit.md new file mode 100644 index 000000000..c5713d25b --- /dev/null +++ b/docs/rebase-analysis/06-full-feature-audit.md @@ -0,0 +1,315 @@ +# Full Feature Audit: `rebase/local-docker-sandbox` vs `origin/main` + +**Date:** 2026-04-02 +**Branch:** `rebase/local-docker-sandbox` (7 commits on `fdbc0a5`/`origin/main`) +**Scope:** 39 files changed, +5,778 / −33 lines + +--- + +## 1. Changed Files Inventory + +### Backend — Core Docker Sandbox (NEW files) + +| File | Lines | Purpose | +|------|-------|---------| +| `src/ii_agent/agents/sandboxes/docker.py` | 962 | Full `DockerSandbox` provider — all 26 abstract methods + 3 extras | +| `src/ii_agent/agents/sandboxes/port_manager.py` | 583 | `PortPoolManager` — port allocation, container scanning, thread safety | +| `src/ii_agent/agents/sandboxes/orphan_cleanup.py` | 168 | Background loop to remove orphaned Docker containers | + +### Backend — Integration Points (MODIFIED files) + +| File | Change | Assessment | +|------|--------|------------| +| `agents/sandboxes/__init__.py` | +2 lines: export `DockerSandbox` | ✅ Correct | +| `agents/sandboxes/base.py` | `expose_port` gains `external` kwarg | ✅ Backward-compatible (default=True) | +| `agents/sandboxes/e2b.py` | Signature update only | ✅ Minimal, correct | +| `agents/sandboxes/service.py` | +12 lines: Docker provider in `_create_provider`/`_connect_provider` | ✅ Correct pattern | +| `core/config/sandbox.py` | +42 lines: Docker config fields | ✅ All have defaults, non-breaking | +| `app/lifespan.py` | +26 lines: port scan + orphan cleanup at startup/shutdown | ✅ Guarded by `local_mode` flag | +| `auth/router.py` | +38 lines: `/dev/login` endpoint | ✅ Guarded by `local_mode` flag | + +### Frontend (MODIFIED files) + +| File | Change | Assessment | +|------|--------|------------| +| `lib/utils.ts` | `isSandboxLink()` replaces hardcoded E2B check; `rewriteLocalhostUrl()` for LAN access | ✅ Correct, backward-compatible | +| `lib/__tests__/utils.test.ts` | New test file for `isSandboxLink` + `rewriteLocalhostUrl` | ✅ Good | +| `state/slice/agent.ts` | New `sandboxStatus` state + selector | ✅ Additive | +| `state/__tests__/agent-sandbox-status.test.ts` | Tests for new state | ✅ Good | +| `hooks/use-app-events.tsx` | Dispatches `setSandboxStatus`, rewrites localhost URLs | ✅ Correct | +| `hooks/use-navigation-leave-session.tsx` | Resets `sandboxStatus` on leave | ✅ Correct | +| `components/agent/agent-result.tsx` | Uses `sandboxStatus === 'paused'` instead of `isE2bLink()` for awake screen; moves null-check after awake screen | ✅ Better UX for Docker | +| `components/agent/agent-task.tsx` | Stops auto-promoting tasks when agent is stopped | ✅ UX fix | +| `components/agent/subagent-container.tsx` | Adds `stopped` status | ✅ Additive | +| `components/share-agent-content.tsx` | `isSandboxLink` for vscodeUrl; normalizes `chat` agent_type | ✅ Correct | +| `typings/agent.ts` | Adds `'stopped'` to `AgentContext.status` union | ✅ Additive | +| `constants/models.tsx` | Adds `claude-opus-4-6` and `claude-sonnet-4-6` | ✅ (Unrelated to sandbox, useful) | +| `app/routes/agent.tsx` | Redirects `chat` type sessions to `/chat` | ✅ UX fix | +| `app/routes/login.tsx` | `DevLoginButton` component | ✅ Guarded by backend availability check | +| `package.json` | Adds `vitest` + test scripts | ✅ Good | + +### Infrastructure & Docs + +| File | Assessment | +|------|------------| +| `docker/docker-compose.local.yaml` | ✅ Full local stack (postgres, redis, minio, backend, frontend) | +| `docker/.stack.env.local.example` | ✅ Template for local env | +| `scripts/stack_control.sh` | ✅ Stack management (start, stop, rebuild, logs) | +| `scripts/html_to_pdf.py` | ✅ Utility script | +| `.github/copilot-instructions.md` | ✅ Agent instructions | +| `docs/docs/*.md` (6 files) | ✅ Comprehensive documentation | + +### Tests (NEW files) + +| File | Tests | Assessment | +|------|-------|------------| +| `test_docker_sandbox.py` | 100+ | ✅ Thorough coverage | +| `test_port_manager.py` | 48 | ✅ Exhaustive | +| `test_orphan_cleanup.py` | 24+ | ✅ Good | + +--- + +## 2. Feature Porting Assessment + +### ✅ Fully Ported Features + +| Feature | Original Location | New Location | Status | +|---------|-------------------|--------------|--------| +| Docker container sandbox lifecycle | `ii_sandbox_server/sandboxes/docker.py` | `agents/sandboxes/docker.py` | Complete — integrated directly as `Sandbox` subclass | +| Port pool management | `ii_sandbox_server/sandboxes/port_manager.py` | `agents/sandboxes/port_manager.py` | Complete — enhanced with thread safety, container scanning | +| Orphan container cleanup | `ii_sandbox_server/lifecycle/sandbox_controller.py` | `agents/sandboxes/orphan_cleanup.py` | Complete — extracted to dedicated module | +| SandboxService Docker routing | `server/services/sandbox_service.py` | `agents/sandboxes/service.py` | Complete — `_create_provider`/`_connect_provider` dispatch | +| Config: Docker-specific settings | `ii_sandbox_server/config.py` | `core/config/sandbox.py` | Complete — `docker_image`, `docker_network`, `port_range_*`, `local_mode`, etc. | +| Dev login (no-OAuth local mode) | `server/api/auth.py` | `auth/router.py` | Complete — `/dev/login` endpoint | +| Frontend: sandbox URL detection | `lib/utils.ts` | `lib/utils.ts` | Complete — `isSandboxLink()` handles both E2B and Docker | +| Frontend: localhost URL rewriting | (new) | `lib/utils.ts` | Complete — LAN access support | +| Frontend: sandbox status tracking | (new) | `state/slice/agent.ts` | Complete — `sandboxStatus` state | +| Frontend: stopped agent UX | (new) | Multiple components | Complete — task display, subagent container | +| Frontend: chat routing fix | (new) | `routes/agent.tsx`, `share-agent-content.tsx` | Complete | +| Lifespan: Docker startup/shutdown | `sandbox_controller.py` | `app/lifespan.py` | Complete — container scan + orphan cleanup | +| Docker compose: full local stack | `docker-compose.local-only.yaml` | `docker/docker-compose.local.yaml` | Complete | + +### ✅ Correctly NOT Ported (obsolete/replaced by main) + +| Original Feature | Why Not Ported | +|------------------|---------------| +| `ii_sandbox_server/` (entire package) | **Eliminated by architecture change.** Main's `SandboxService` + provider pattern replaces the separate sandbox server. Docker operations now happen in-process via Docker SDK instead of through HTTP to a separate server. This is a **design improvement**. | +| `ii_sandbox_server/client/client.py` | HTTP client to sandbox server — unnecessary when Docker SDK calls are in-process. | +| `ii_sandbox_server/lifecycle/queue.py` | Redis queue scheduler for sandbox operations — replaced by direct async calls in the service layer. | +| `ii_sandbox_server/db/manager.py` | Separate sandbox DB — replaced by `AgentSandbox` model in main's unified DB. | +| `src/ii_agent/adapters/sandbox_adapter.py` | Adapter between old `IISandbox` and `ii_tool.SandboxInterface` — both gone on main. | +| `src/ii_agent/sandbox/ii_sandbox.py` | Old sandbox client — replaced by `Sandbox` abstract class + `DockerSandbox`. | +| `src/ii_agent/server/*` (60+ files) | Entire old server package restructured into domain modules on main. | +| `src/ii_agent/controller/*` | Old controller pattern — replaced by agent runtime + handler pattern. | +| `src/ii_tool/*` changes | Tool changes were for old `SandboxInterface` bridge — main's tools call `Sandbox` directly. | +| `start_sandbox_server.sh` | No longer needed — no separate sandbox server process. | +| `scripts/run_stack.sh` | Replaced by `scripts/stack_control.sh`. | + +--- + +## 3. Gap Analysis: Missing Features + +### Gap 1: Shell (PTY) Backend — SIGNIFICANT + +**Status:** Missing +**Impact:** Medium-High + +E2BSandbox exposes a `shell` property returning `E2BShell` — a full persistent terminal backend implementing the `Shell` abstract class (18 abstract methods). `SandboxService` uses this for `create_shell_session`, `run_shell_command`, `kill_shell_command`, `list_shell_sessions`, etc. + +**DockerSandbox has no `shell` property.** It has `run_command()` (synchronous exec) and `create_live_terminal()` (WebSocket terminal), but no `Shell` subclass for persistent PTY session management. + +**Consequence:** Shell-based tools (`persistent_shell`) will raise `ShellOperationError("Persistent shell sessions are not supported by sandbox ...")` for Docker sandboxes. + +**Remediation options:** +1. **DockerShell implementation** — Create `docker_shell.py` implementing `Shell` using Docker exec + tmux/screen for session persistence (similar to how `E2BShell` uses E2B's PTY API). The Docker sandbox already has `create_live_terminal()` which creates terminals; a `DockerShell` could build on `exec_run` with tmux session management. +2. **Alternative design:** Use the existing `create_live_terminal()` WebSocket approach as the primary interactive shell, with `run_command()` as the fallback for non-interactive use. Most agent tool calls use `run_command()` already. + +**Assessment:** This gap is real but **mitigated** because: +- Most agent tool execution uses `run_command()` (synchronous exec), not persistent shells +- The persistent shell feature is primarily UI-facing (terminal tabs in the frontend) +- `run_command()` works correctly for all tool-driven command execution + +### Gap 2: Sandbox Pause/Resume — PARTIAL + +**Status:** Partially implemented +**Impact:** Low + +`DockerSandbox.pause()` calls `container.pause()` (Docker native pause). However: +- Docker pause freezes processes in-place (SIGSTOP) — different from E2B's snapshot-and-destroy model +- No explicit `resume()` / `unpause()` method (Docker API has `container.unpause()`) +- The `awake_sandbox` Socket.IO handler calls `init_sandbox()` which reconnects via `connect()` — this works for Docker since the container is still alive when paused + +**Assessment:** Functionally adequate. Docker's pause/unpause is simpler and more reliable than E2B's snapshot model. A minor enhancement would be to add an explicit `unpause()` path in `connect()`. + +### Gap 3: Extended Timeout / Auto-Pause — COSMETIC + +**Status:** Config exists but unused for Docker +**Impact:** Low + +`SandboxSettings.extended_timeout_seconds` and `auto_pause` are E2B-specific. Docker sandbox timeout is managed by `set_timeout()` which kills the container. No auto-pause-on-inactivity logic exists for Docker. + +**Assessment:** Docker containers persist until explicitly killed or timeout expires. This is actually better for local use — no unexpected pauses. Not a real gap. + +### Gap 4: Sandbox Explorer Integration — UNTESTED + +**Status:** Implemented but untested for Docker +**Impact:** Low + +`explorer.py` provides `WorkspaceExplorerService` which calls `sandbox.list_files_with_contents()` and `sandbox.watch_dir()`. `DockerSandbox` implements both, but: +- `watch_dir()` raises `NotImplementedError` — it's stubbed +- `list_files_with_contents()` delegates to `list_files_recursive()` + `read_file_content()` + +**Assessment:** `watch_dir()` needs implementation for live workspace explorer. This is a pre-existing limitation (it was also missing in the old branch). + +--- + +## 4. Database Migration Path + +### Current State + +| Aspect | Existing DB | Target (New Baseline) | +|--------|-------------|----------------------| +| Tables | 21 | 40 | +| Alembic head | `f7g8h9i0j1k2` | `20260330_000000` chain | +| ID types | `VARCHAR` (string UUIDs) | `UUID` (native) | +| Session columns | `sandbox_id`, `llm_setting_id`, `status`, `agent_state_path`, `state_storage_url`, `deleted_at`, `prompt_tokens`, `completion_tokens`, `summary_message_id`, `cost` | `model_setting_id`, `app_kind`, `api_version`, `session_metadata`, `is_deleted` | +| User columns | `credits`, `bonus_credits` | `language` + credit tables | +| Table renames | `llm_settings` | `model_settings` | +| | `events` | `application_events` / `agent_event_logs` | +| | `file_uploads` | `user_assets` / `session_assets` | +| | `provider_containers` | `chat_provider_containers` | + +### Key Schema Differences + +1. **ID type change:** All PKs and FKs changed from `VARCHAR` to `UUID(as_uuid=True)`. The existing data uses string-formatted UUIDs, so the values are compatible — but the column types must be `ALTER`ed. + +2. **Table renames:** + - `llm_settings` → `model_settings` + - `events` → split into `application_events` + `agent_event_logs` + - `file_uploads` → `user_assets` / `session_assets` + - `provider_containers` → `chat_provider_containers` + - `provider_files` → `chat_provider_files` + - `provider_vector_stores` → `chat_provider_vector_stores` + - `agent_run_tasks` → `agent_run_messages` (with structural changes) + +3. **Session table restructure:** + - Removed: `sandbox_id`, `agent_state_path`, `state_storage_url`, `prompt_tokens`, `completion_tokens`, `summary_message_id`, `cost` + - Renamed: `llm_setting_id` → `model_setting_id`, `deleted_at` → `is_deleted` + - Added: `app_kind`, `api_version`, `session_metadata` + +4. **New tables (19):** `agent_event_logs`, `agent_run_messages`, `agent_sandboxes`, `apple_credentials`, `chat_provider_*`, `chat_summaries`, `composio_profiles`, `credit_balances`, `credit_transactions`, `media_templates`, `model_settings`, `project_custom_domains`, `project_databases`, `run_tasks`, `session_assets`, `session_pins`, `session_summaries`, `skills`, `slide_versions`, `storybook*`, `task_logs`, `user_assets` + +5. **Tables to remove:** `session_metrics` (not in target) + +### Migration Strategy + +The schema differences are extensive enough that an incremental Alembic migration would be fragile. Recommended approach: + +#### Option A: Data-Preserving Fresh Start (RECOMMENDED) + +1. **Export critical data** from existing DB: + ```bash + # Export sessions, messages, and user + docker exec ii-agent-local-postgres-1 pg_dump -U iiagent -d iiagentdev \ + --data-only -t users -t sessions -t chat_messages -t session_wishlists \ + -t agent_run_tasks > /tmp/old_data.sql + ``` + +2. **Reset DB with new schema:** + ```bash + docker exec ii-agent-local-postgres-1 psql -U iiagent -c "DROP DATABASE iiagentdev;" + docker exec ii-agent-local-postgres-1 psql -U iiagent -c "CREATE DATABASE iiagentdev;" + ``` + +3. **Run Alembic migrations** (the app does this on startup): + ```bash + # Or let the app do it: + II_AGENT_SKIP_MIGRATIONS=false ./scripts/start.sh + ``` + +4. **Transform and import data** via a migration script that: + - Converts `VARCHAR` IDs to `UUID` type + - Maps `users.id` (VARCHAR) → `users.id` (UUID) + - Maps `sessions.llm_setting_id` → `sessions.model_setting_id` + - Maps `sessions.deleted_at IS NOT NULL` → `sessions.is_deleted = true` + - Sets `sessions.app_kind = 'agent'` (or `'chat'` based on `agent_type`) + - Drops columns that no longer exist (`sandbox_id`, `agent_state_path`, etc.) + - Creates `agent_sandboxes` records from `sessions.sandbox_id` where non-null + - Imports `chat_messages` with UUID conversion on `session_id` + +#### Option B: In-Place Alembic Migration + +Write a custom Alembic migration that: +1. Renames tables (`llm_settings` → `model_settings`, etc.) +2. `ALTER COLUMN` to change `VARCHAR` → `UUID USING id::uuid` +3. Adds new columns with defaults +4. Drops deprecated columns +5. Creates new tables +6. Updates `alembic_version` to the new head + +This is more complex but avoids data round-tripping. The main risk is the `VARCHAR` → `UUID` type change on columns with foreign key constraints (requires dropping and re-creating FKs). + +### Recommended Migration Script Outline + +```python +"""migrate_existing_data.py — Run after new schema is in place.""" + +import asyncio +import uuid +from sqlalchemy import text +from ii_agent.core.db.base import get_engine + +OLD_DB_URL = "postgresql://iiagent:...@localhost:5432/iiagentdev_old" +NEW_DB_URL = "postgresql://iiagent:...@localhost:5432/iiagentdev" + +async def migrate(): + # 1. Read from old DB + # 2. Transform records + # 3. Insert into new DB + + # Users: VARCHAR id → UUID + # Sessions: rename columns, set defaults for new fields + # ChatMessages: keep content/role/usage, convert session_id + # AgentRunTasks → agent_run_messages: structural transform + pass +``` + +### Data Preservation Summary + +| Table | Records | Preservable? | Notes | +|-------|---------|--------------|-------| +| `users` | 1 | ✅ Yes | ID type conversion needed. `credits`/`bonus_credits` → `credit_balances` table | +| `sessions` | 22 active | ✅ Yes | Column mapping needed (see above). Active sessions will continue. | +| `chat_messages` | 317 | ✅ Yes | `session_id` VARCHAR→UUID. Schema mostly compatible. | +| `agent_run_tasks` | 270 | ⚠️ Partial | Structure differs from `agent_run_messages`. Core fields preservable. | +| `session_wishlists` | ? | ✅ Yes | Direct migration, ID conversion only | +| `llm_settings` | ? | ✅ Yes | Rename to `model_settings`, ID conversion | +| `mcp_settings` | ? | ✅ Yes | ID conversion only | +| `slide_contents` | ? | ✅ Yes | ID conversion | +| `slide_templates` | ? | ✅ Yes | ID conversion (seeded data may be re-created) | +| `session_metrics` | ? | ❌ No | Table removed in new schema | +| `connectors` | ? | ✅ Yes | Likely empty, ID conversion | + +--- + +## 5. Summary & Recommendations + +### Porting Quality: EXCELLENT + +The rebase correctly identified that the old `ii_sandbox_server` intermediary pattern was eliminated by main's direct-provider architecture, and rebuilt the Docker sandbox as a first-class `Sandbox` subclass. All 26 abstract methods are implemented. The integration with `SandboxService`, lifespan, and config is clean and follows main's established patterns. + +### Action Items + +| Priority | Item | Effort | +|----------|------|--------| +| **P1** | Write data migration script for existing sessions | Medium | +| **P2** | Implement `DockerShell` for persistent PTY sessions | Medium | +| **P3** | Implement `watch_dir()` for workspace explorer | Low | +| **P4** | Add `unpause()` call path in `connect()` for paused Docker containers | Low | + +### Risk Assessment + +- **No regressions to E2B:** All E2B changes are signature-only (`external` kwarg with default). Zero functional impact. +- **No regressions to main features:** All changes are additive or guarded by `local_mode` flag. +- **Frontend changes are backward-compatible:** `isSandboxLink()` is a superset of `isE2bLink()`. New state fields have empty defaults. +- **Database migration is feasible** but requires a dedicated script due to the VARCHAR→UUID type change and column restructuring. diff --git a/e2b.Dockerfile b/e2b.Dockerfile index be04871bf..12fe4283d 100644 --- a/e2b.Dockerfile +++ b/e2b.Dockerfile @@ -57,6 +57,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ unzip \ libmagic1 \ xvfb \ + x11vnc \ + novnc \ + websockify \ + fluxbox \ pandoc \ weasyprint \ libpq-dev \ @@ -82,6 +86,16 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ # Optimization: Combine all curl installs and npm installs into fewer layers RUN curl -fsSL https://code-server.dev/install.sh | sh +# GitHub CLI (gh) — required by the Copilot A2A backend (`gh copilot agent`) +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list && \ + apt-get update && apt-get install -y gh && \ + rm -rf /var/lib/apt/lists/* + # Optimization: Use npm cache mount and install playwright package and system deps as root RUN --mount=type=cache,target=/root/.npm \ npm install -g agent-browser @intelligent-internet/codex @ast-grep/cli @anthropic-ai/claude-code @@ -144,6 +158,12 @@ RUN --mount=type=cache,target=/root/.cache/uv \ COPY src/ii_server /app/ii_sandbox/src/ii_server COPY src/ii_agent_tools /app/ii_sandbox/src/ii_agent_tools +# Copy the A2A adapter subtree + minimal parent __init__.py files so +# `python -m ii_agent.integrations.a2a.adapter_server` resolves inside the sandbox. +COPY src/ii_agent/__init__.py /app/ii_sandbox/src/ii_agent/__init__.py +COPY src/ii_agent/integrations/__init__.py /app/ii_sandbox/src/ii_agent/integrations/__init__.py +COPY src/ii_agent/integrations/a2a /app/ii_sandbox/src/ii_agent/integrations/a2a + # Optimization: Copy from cached location in codex-builder COPY --from=codex-builder /sse-http-server /usr/local/bin/sse-http-server @@ -185,10 +205,21 @@ ENV PATH="/home/user/.bun/bin:/app/ii_sandbox/.venv/bin:$PATH" USER user -# Install Playwright browser binaries +# Install Playwright browser binaries and create system symlinks RUN playwright install chromium +USER root +RUN CHROME_BIN=$(find /home/user/.cache/ms-playwright -name chrome -path '*/chrome-linux/*' | head -1) && \ + ln -sf "$CHROME_BIN" /usr/local/bin/chromium-browser && \ + ln -sf "$CHROME_BIN" /usr/local/bin/chromium && \ + ln -sf "$CHROME_BIN" /usr/local/bin/google-chrome +USER user WORKDIR /home/user +# A2A adapter port — served by ii_agent.integrations.a2a.adapter_server +# (launched by start-services.sh; default 18100 is in the control-plane range 18000-18999) +ENV SANDBOX_ADAPTER_PORT=18100 +EXPOSE 18100 + ENTRYPOINT ["/app/entrypoint.sh"] CMD ["bash", "/app/start-services.sh"] diff --git a/frontend/package.json b/frontend/package.json index cbb3d71a3..8968e730b 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,7 +15,9 @@ "tauri": "tauri", "prepare": "husky", "lint": "eslint . --report-unused-disable-directives --max-warnings 0", - "format": "prettier --write ." + "format": "prettier --write .", + "test": "vitest run", + "test:watch": "vitest" }, "lint-staged": { "**/*": "prettier --write --ignore-unknown" @@ -128,6 +130,7 @@ "typescript": "^5.8.3", "typescript-eslint": "^8.31.1", "vite": "^6.3.4", - "vite-plugin-svgr": "^4.3.0" + "vite-plugin-svgr": "^4.3.0", + "vitest": "^3.2.1" } } diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 0bf002b7f..acf4a603b 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -327,6 +327,9 @@ importers: vite-plugin-svgr: specifier: ^4.3.0 version: 4.3.0(rollup@4.46.2)(typescript@5.9.2)(vite@6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1)) + vitest: + specifier: ^3.2.1 + version: 3.2.4(@types/debug@4.1.12)(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1) packages: @@ -1315,56 +1318,67 @@ packages: resolution: {integrity: sha512-EtP8aquZ0xQg0ETFcxUbU71MZlHaw9MChwrQzatiE8U/bvi5uv/oChExXC4mWhjiqK7azGJBqU0tt5H123SzVA==} cpu: [arm] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.46.2': resolution: {integrity: sha512-qO7F7U3u1nfxYRPM8HqFtLd+raev2K137dsV08q/LRKRLEc7RsiDWihUnrINdsWQxPR9jqZ8DIIZ1zJJAm5PjQ==} cpu: [arm] os: [linux] + libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.46.2': resolution: {integrity: sha512-3dRaqLfcOXYsfvw5xMrxAk9Lb1f395gkoBYzSFcc/scgRFptRXL9DOaDpMiehf9CO8ZDRJW2z45b6fpU5nwjng==} cpu: [arm64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.46.2': resolution: {integrity: sha512-fhHFTutA7SM+IrR6lIfiHskxmpmPTJUXpWIsBXpeEwNgZzZZSg/q4i6FU4J8qOGyJ0TR+wXBwx/L7Ho9z0+uDg==} cpu: [arm64] os: [linux] + libc: [musl] '@rollup/rollup-linux-loongarch64-gnu@4.46.2': resolution: {integrity: sha512-i7wfGFXu8x4+FRqPymzjD+Hyav8l95UIZ773j7J7zRYc3Xsxy2wIn4x+llpunexXe6laaO72iEjeeGyUFmjKeA==} cpu: [loong64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-ppc64-gnu@4.46.2': resolution: {integrity: sha512-B/l0dFcHVUnqcGZWKcWBSV2PF01YUt0Rvlurci5P+neqY/yMKchGU8ullZvIv5e8Y1C6wOn+U03mrDylP5q9Yw==} cpu: [ppc64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-gnu@4.46.2': resolution: {integrity: sha512-32k4ENb5ygtkMwPMucAb8MtV8olkPT03oiTxJbgkJa7lJ7dZMr0GCFJlyvy+K8iq7F/iuOr41ZdUHaOiqyR3iQ==} cpu: [riscv64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.46.2': resolution: {integrity: sha512-t5B2loThlFEauloaQkZg9gxV05BYeITLvLkWOkRXogP4qHXLkWSbSHKM9S6H1schf/0YGP/qNKtiISlxvfmmZw==} cpu: [riscv64] os: [linux] + libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.46.2': resolution: {integrity: sha512-YKjekwTEKgbB7n17gmODSmJVUIvj8CX7q5442/CK80L8nqOUbMtf8b01QkG3jOqyr1rotrAnW6B/qiHwfcuWQA==} cpu: [s390x] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.46.2': resolution: {integrity: sha512-Jj5a9RUoe5ra+MEyERkDKLwTXVu6s3aACP51nkfnK9wJTraCC8IMe3snOfALkrjTYd2G1ViE1hICj0fZ7ALBPA==} cpu: [x64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-musl@4.46.2': resolution: {integrity: sha512-7kX69DIrBeD7yNp4A5b81izs8BqoZkCIaxQaOpumcJ1S/kmqNFjPhDu1LHeVXv0SexfHQv5cqHsxLOjETuqDuA==} cpu: [x64] os: [linux] + libc: [musl] '@rollup/rollup-win32-arm64-msvc@4.46.2': resolution: {integrity: sha512-wiJWMIpeaak/jsbaq2HMh/rzZxHVW1rU6coyeNNpMwk5isiPjSTx0a4YLSlYDwBH/WBvLz+EtsNqQScZTLJy3g==} @@ -1615,24 +1629,28 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-arm64-musl@4.1.12': resolution: {integrity: sha512-V8pAM3s8gsrXcCv6kCHSuwyb/gPsd863iT+v1PGXC4fSL/OJqsKhfK//v8P+w9ThKIoqNbEnsZqNy+WDnwQqCA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@tailwindcss/oxide-linux-x64-gnu@4.1.12': resolution: {integrity: sha512-xYfqYLjvm2UQ3TZggTGrwxjYaLB62b1Wiysw/YE3Yqbh86sOMoTn0feF98PonP7LtjsWOWcXEbGqDL7zv0uW8Q==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-x64-musl@4.1.12': resolution: {integrity: sha512-ha0pHPamN+fWZY7GCzz5rKunlv9L5R8kdh+YNvP5awe3LtuXb5nRi/H27GeL2U+TdhDOptU7T6Is7mdwh5Ar3A==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@tailwindcss/oxide-wasm32-wasi@4.1.12': resolution: {integrity: sha512-4tSyu3dW+ktzdEpuk6g49KdEangu3eCYoqPhWNsZgUhyegEda3M9rG0/j1GV/JjVVsj+lG7jWAyrTlLzd/WEBg==} @@ -1704,30 +1722,35 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@tauri-apps/cli-linux-arm64-musl@2.7.1': resolution: {integrity: sha512-/HXY0t4FHkpFzjeYS5c16mlA6z0kzn5uKLWptTLTdFSnYpr8FCnOP4Sdkvm2TDQPF2ERxXtNCd+WR/jQugbGnA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@tauri-apps/cli-linux-riscv64-gnu@2.7.1': resolution: {integrity: sha512-GeW5lVI2GhhnaYckiDzstG2j2Jwlud5d2XefRGwlOK+C/bVGLT1le8MNPYK8wgRlpeK8fG1WnJJYD6Ke7YQ8bg==} engines: {node: '>= 10'} cpu: [riscv64] os: [linux] + libc: [glibc] '@tauri-apps/cli-linux-x64-gnu@2.7.1': resolution: {integrity: sha512-DprxKQkPxIPYwUgg+cscpv2lcIUhn2nxEPlk0UeaiV9vATxCXyytxr1gLcj3xgjGyNPlM0MlJyYaPy1JmRg1cA==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@tauri-apps/cli-linux-x64-musl@2.7.1': resolution: {integrity: sha512-KLlq3kOK7OUyDR757c0zQjPULpGZpLhNB0lZmZpHXvoOUcqZoCXJHh4dT/mryWZJp5ilrem5l8o9ngrDo0X1AA==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@tauri-apps/cli-win32-arm64-msvc@2.7.1': resolution: {integrity: sha512-dH7KUjKkSypCeWPiainHyXoES3obS+JIZVoSwSZfKq2gWgs48FY3oT0hQNYrWveE+VR4VoR3b/F3CPGbgFvksA==} @@ -1782,6 +1805,9 @@ packages: '@types/babel__traverse@7.28.0': resolution: {integrity: sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==} + '@types/chai@5.2.3': + resolution: {integrity: sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==} + '@types/d3-array@3.2.2': resolution: {integrity: sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw==} @@ -1878,6 +1904,9 @@ packages: '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} + '@types/deep-eql@4.0.2': + resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + '@types/estree-jsx@1.0.5': resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==} @@ -2013,6 +2042,35 @@ packages: peerDependencies: vite: ^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 + '@vitest/expect@3.2.4': + resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==} + + '@vitest/mocker@3.2.4': + resolution: {integrity: sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==} + peerDependencies: + msw: ^2.4.9 + vite: ^5.0.0 || ^6.0.0 || ^7.0.0-0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@3.2.4': + resolution: {integrity: sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==} + + '@vitest/runner@3.2.4': + resolution: {integrity: sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==} + + '@vitest/snapshot@3.2.4': + resolution: {integrity: sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==} + + '@vitest/spy@3.2.4': + resolution: {integrity: sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==} + + '@vitest/utils@3.2.4': + resolution: {integrity: sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==} + '@xterm/addon-fit@0.10.0': resolution: {integrity: sha512-UFYkDm4HUahf2lnEyHvio51TNGiLK66mqP2JoATy7hRZeXaGMRDr00JiSF7m63vR5WKATF605yEggJKsw0JpMQ==} peerDependencies: @@ -2108,6 +2166,10 @@ packages: resolution: {integrity: sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ==} engines: {node: '>= 0.4'} + assertion-error@2.0.1: + resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} + engines: {node: '>=12'} + async-function@1.0.0: resolution: {integrity: sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA==} engines: {node: '>= 0.4'} @@ -2154,6 +2216,10 @@ packages: buffer-from@1.1.2: resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} + cac@6.7.14: + resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} + engines: {node: '>=8'} + call-bind-apply-helpers@1.0.2: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} @@ -2184,6 +2250,10 @@ packages: ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} + chai@5.3.3: + resolution: {integrity: sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==} + engines: {node: '>=18'} + chalk@4.1.2: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} @@ -2204,6 +2274,10 @@ packages: character-reference-invalid@2.0.1: resolution: {integrity: sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==} + check-error@2.1.3: + resolution: {integrity: sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==} + engines: {node: '>= 16'} + chevrotain-allstar@0.3.1: resolution: {integrity: sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==} peerDependencies: @@ -2518,6 +2592,10 @@ packages: decode-named-character-reference@1.2.0: resolution: {integrity: sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==} + deep-eql@5.0.2: + resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==} + engines: {node: '>=6'} + deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} @@ -2629,6 +2707,9 @@ packages: resolution: {integrity: sha512-uDn+FE1yrDzyC0pCo961B2IHbdM8y/ACZsKD4dG6WqrjV53BADjwa7D+1aom2rsNVfLyDgU/eigvlJGJ08OQ4w==} engines: {node: '>= 0.4'} + es-module-lexer@1.7.0: + resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + es-object-atoms@1.1.1: resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} engines: {node: '>= 0.4'} @@ -2718,6 +2799,9 @@ packages: estree-walker@2.0.2: resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==} + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + esutils@2.0.3: resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==} engines: {node: '>=0.10.0'} @@ -2733,6 +2817,10 @@ packages: resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==} engines: {node: '>=16.17'} + expect-type@1.3.0: + resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==} + engines: {node: '>=12.0.0'} + exsolve@1.0.7: resolution: {integrity: sha512-VO5fQUzZtI6C+vx4w/4BWJpg3s/5l+6pRQEHzFRM8WFi4XffSP1Z+4qi7GbjWbvRQEbdIco5mIMq+zX4rPuLrw==} @@ -3229,6 +3317,9 @@ packages: js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} + js-tokens@9.0.1: + resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} + js-yaml@4.1.0: resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==} hasBin: true @@ -3327,24 +3418,28 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] lightningcss-linux-arm64-musl@1.30.1: resolution: {integrity: sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] lightningcss-linux-x64-gnu@1.30.1: resolution: {integrity: sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] lightningcss-linux-x64-musl@1.30.1: resolution: {integrity: sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] lightningcss-win32-arm64-msvc@1.30.1: resolution: {integrity: sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==} @@ -3415,6 +3510,9 @@ packages: lottie-web@5.13.0: resolution: {integrity: sha512-+gfBXl6sxXMPe8tKQm7qzLnUy5DUPJPKIyRHwtpCpyUEYjHYRJC/5gjUvdkuO2c3JllrPtHXH5UJJK8LRYl5yQ==} + loupe@3.2.1: + resolution: {integrity: sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==} + lower-case@2.0.2: resolution: {integrity: sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==} @@ -3865,6 +3963,10 @@ packages: pathe@2.0.3: resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + pathval@2.0.1: + resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==} + engines: {node: '>= 14.16'} + performance-now@2.1.0: resolution: {integrity: sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==} @@ -4278,6 +4380,9 @@ packages: resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==} engines: {node: '>= 0.4'} + siginfo@2.0.0: + resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} + signal-exit@4.1.0: resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} engines: {node: '>=14'} @@ -4321,6 +4426,9 @@ packages: space-separated-tokens@2.0.2: resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==} + stackback@0.0.2: + resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + stackblur-canvas@2.7.0: resolution: {integrity: sha512-yf7OENo23AGJhBriGx0QivY5JP6Y1HbrrDI6WLt6C5auYZXlQrheoY8hD4ibekFKz1HOfE48Ww8kMWMnJD/zcQ==} engines: {node: '>=0.1.14'} @@ -4328,6 +4436,9 @@ packages: state-local@1.0.7: resolution: {integrity: sha512-HTEHMNieakEnoe33shBYcZ7NX83ACUjCu8c40iOGEZsngj9zRnkqS9j1pqQPXwobB0ZcVTk27REb7COQ0UR59w==} + std-env@3.10.0: + resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==} + stop-iteration-iterator@1.1.0: resolution: {integrity: sha512-eLoXW/DHyl62zxY4SCaIgnRhuMr6ri4juEYARS8E6sCEqzKpOiE521Ucofdx+KnDZl5xmvGYaaKCk5FEOxJCoQ==} engines: {node: '>= 0.4'} @@ -4382,6 +4493,9 @@ packages: resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} engines: {node: '>=8'} + strip-literal@3.1.0: + resolution: {integrity: sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==} + style-to-js@1.1.17: resolution: {integrity: sha512-xQcBGDxJb6jjFCTzvQtfiPn6YvvP2O8U1MDIPNfJQlWMYfktPy+iGsHE7cssjs7y84d9fQaK4UF3RIJaAHSoYA==} @@ -4433,6 +4547,12 @@ packages: text-segmentation@1.0.3: resolution: {integrity: sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==} + tinybench@2.9.0: + resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + + tinyexec@0.3.2: + resolution: {integrity: sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==} + tinyexec@1.0.1: resolution: {integrity: sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw==} @@ -4440,6 +4560,18 @@ packages: resolution: {integrity: sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==} engines: {node: '>=12.0.0'} + tinypool@1.1.1: + resolution: {integrity: sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==} + engines: {node: ^18.0.0 || >=20.0.0} + + tinyrainbow@2.0.0: + resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==} + engines: {node: '>=14.0.0'} + + tinyspy@4.0.4: + resolution: {integrity: sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==} + engines: {node: '>=14.0.0'} + to-regex-range@5.0.1: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} @@ -4604,6 +4736,11 @@ packages: vfile@6.0.3: resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} + vite-node@3.2.4: + resolution: {integrity: sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + vite-plugin-svgr@4.3.0: resolution: {integrity: sha512-Jy9qLB2/PyWklpYy0xk0UU3TlU0t2UMpJXZvf+hWII1lAmRHrOUKi11Uw8N3rxoNk7atZNYO3pR3vI1f7oi+6w==} peerDependencies: @@ -4649,6 +4786,34 @@ packages: yaml: optional: true + vitest@3.2.4: + resolution: {integrity: sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@types/debug': ^4.1.12 + '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0 + '@vitest/browser': 3.2.4 + '@vitest/ui': 3.2.4 + happy-dom: '*' + jsdom: '*' + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@types/debug': + optional: true + '@types/node': + optional: true + '@vitest/browser': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + void-elements@3.1.0: resolution: {integrity: sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==} engines: {node: '>=0.10.0'} @@ -4710,6 +4875,11 @@ packages: engines: {node: '>= 8'} hasBin: true + why-is-node-running@2.3.0: + resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} + engines: {node: '>=8'} + hasBin: true + word-wrap@1.2.5: resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==} engines: {node: '>=0.10.0'} @@ -6153,6 +6323,11 @@ snapshots: dependencies: '@babel/types': 7.28.2 + '@types/chai@5.2.3': + dependencies: + '@types/deep-eql': 4.0.2 + assertion-error: 2.0.1 + '@types/d3-array@3.2.2': {} '@types/d3-axis@3.0.6': @@ -6274,6 +6449,8 @@ snapshots: dependencies: '@types/ms': 2.1.0 + '@types/deep-eql@4.0.2': {} + '@types/estree-jsx@1.0.5': dependencies: '@types/estree': 1.0.8 @@ -6447,6 +6624,48 @@ snapshots: transitivePeerDependencies: - supports-color + '@vitest/expect@3.2.4': + dependencies: + '@types/chai': 5.2.3 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.3.3 + tinyrainbow: 2.0.0 + + '@vitest/mocker@3.2.4(vite@6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1))': + dependencies: + '@vitest/spy': 3.2.4 + estree-walker: 3.0.3 + magic-string: 0.30.17 + optionalDependencies: + vite: 6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1) + + '@vitest/pretty-format@3.2.4': + dependencies: + tinyrainbow: 2.0.0 + + '@vitest/runner@3.2.4': + dependencies: + '@vitest/utils': 3.2.4 + pathe: 2.0.3 + strip-literal: 3.1.0 + + '@vitest/snapshot@3.2.4': + dependencies: + '@vitest/pretty-format': 3.2.4 + magic-string: 0.30.17 + pathe: 2.0.3 + + '@vitest/spy@3.2.4': + dependencies: + tinyspy: 4.0.4 + + '@vitest/utils@3.2.4': + dependencies: + '@vitest/pretty-format': 3.2.4 + loupe: 3.2.1 + tinyrainbow: 2.0.0 + '@xterm/addon-fit@0.10.0(@xterm/xterm@5.5.0)': dependencies: '@xterm/xterm': 5.5.0 @@ -6583,6 +6802,8 @@ snapshots: get-intrinsic: 1.3.0 is-array-buffer: 3.0.5 + assertion-error@2.0.1: {} + async-function@1.0.0: {} asynckit@0.4.0: {} @@ -6630,6 +6851,8 @@ snapshots: buffer-from@1.1.2: {} + cac@6.7.14: {} + call-bind-apply-helpers@1.0.2: dependencies: es-errors: 1.3.0 @@ -6667,6 +6890,14 @@ snapshots: ccount@2.0.1: {} + chai@5.3.3: + dependencies: + assertion-error: 2.0.1 + check-error: 2.1.3 + deep-eql: 5.0.2 + loupe: 3.2.1 + pathval: 2.0.1 + chalk@4.1.2: dependencies: ansi-styles: 4.3.0 @@ -6682,6 +6913,8 @@ snapshots: character-reference-invalid@2.0.1: {} + check-error@2.1.3: {} + chevrotain-allstar@0.3.1(chevrotain@11.0.3): dependencies: chevrotain: 11.0.3 @@ -7024,6 +7257,8 @@ snapshots: dependencies: character-entities: 2.0.2 + deep-eql@5.0.2: {} + deep-is@0.1.4: {} define-data-property@1.1.4: @@ -7200,6 +7435,8 @@ snapshots: iterator.prototype: 1.1.5 safe-array-concat: 1.1.3 + es-module-lexer@1.7.0: {} + es-object-atoms@1.1.1: dependencies: es-errors: 1.3.0 @@ -7353,6 +7590,10 @@ snapshots: estree-walker@2.0.2: {} + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.8 + esutils@2.0.3: {} eventemitter3@5.0.1: {} @@ -7371,6 +7612,8 @@ snapshots: signal-exit: 4.1.0 strip-final-newline: 3.0.0 + expect-type@1.3.0: {} + exsolve@1.0.7: {} extend@3.0.2: {} @@ -7908,6 +8151,8 @@ snapshots: js-tokens@4.0.0: {} + js-tokens@9.0.1: {} + js-yaml@4.1.0: dependencies: argparse: 2.0.1 @@ -8095,6 +8340,8 @@ snapshots: lottie-web@5.13.0: {} + loupe@3.2.1: {} + lower-case@2.0.2: dependencies: tslib: 2.8.1 @@ -8781,6 +9028,8 @@ snapshots: pathe@2.0.3: {} + pathval@2.0.1: {} + performance-now@2.1.0: optional: true @@ -9276,6 +9525,8 @@ snapshots: side-channel-map: 1.0.1 side-channel-weakmap: 1.0.2 + siginfo@2.0.0: {} + signal-exit@4.1.0: {} slice-ansi@5.0.0: @@ -9327,11 +9578,15 @@ snapshots: space-separated-tokens@2.0.2: {} + stackback@0.0.2: {} + stackblur-canvas@2.7.0: optional: true state-local@1.0.7: {} + std-env@3.10.0: {} + stop-iteration-iterator@1.1.0: dependencies: es-errors: 1.3.0 @@ -9432,6 +9687,10 @@ snapshots: strip-json-comments@3.1.1: {} + strip-literal@3.1.0: + dependencies: + js-tokens: 9.0.1 + style-to-js@1.1.17: dependencies: style-to-object: 1.0.9 @@ -9484,6 +9743,10 @@ snapshots: utrie: 1.0.2 optional: true + tinybench@2.9.0: {} + + tinyexec@0.3.2: {} + tinyexec@1.0.1: {} tinyglobby@0.2.14: @@ -9491,6 +9754,12 @@ snapshots: fdir: 6.5.0(picomatch@4.0.3) picomatch: 4.0.3 + tinypool@1.1.1: {} + + tinyrainbow@2.0.0: {} + + tinyspy@4.0.4: {} + to-regex-range@5.0.1: dependencies: is-number: 7.0.0 @@ -9690,6 +9959,27 @@ snapshots: '@types/unist': 3.0.3 vfile-message: 4.0.3 + vite-node@3.2.4(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1): + dependencies: + cac: 6.7.14 + debug: 4.4.1 + es-module-lexer: 1.7.0 + pathe: 2.0.3 + vite: 6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1) + transitivePeerDependencies: + - '@types/node' + - jiti + - less + - lightningcss + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + vite-plugin-svgr@4.3.0(rollup@4.46.2)(typescript@5.9.2)(vite@6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1)): dependencies: '@rollup/pluginutils': 5.2.0(rollup@4.46.2) @@ -9717,6 +10007,48 @@ snapshots: terser: 5.43.1 yaml: 2.8.1 + vitest@3.2.4(@types/debug@4.1.12)(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1): + dependencies: + '@types/chai': 5.2.3 + '@vitest/expect': 3.2.4 + '@vitest/mocker': 3.2.4(vite@6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1)) + '@vitest/pretty-format': 3.2.4 + '@vitest/runner': 3.2.4 + '@vitest/snapshot': 3.2.4 + '@vitest/spy': 3.2.4 + '@vitest/utils': 3.2.4 + chai: 5.3.3 + debug: 4.4.1 + expect-type: 1.3.0 + magic-string: 0.30.17 + pathe: 2.0.3 + picomatch: 4.0.3 + std-env: 3.10.0 + tinybench: 2.9.0 + tinyexec: 0.3.2 + tinyglobby: 0.2.14 + tinypool: 1.1.1 + tinyrainbow: 2.0.0 + vite: 6.3.5(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1) + vite-node: 3.2.4(@types/node@22.17.2)(jiti@2.5.1)(lightningcss@1.30.1)(terser@5.43.1)(yaml@2.8.1) + why-is-node-running: 2.3.0 + optionalDependencies: + '@types/debug': 4.1.12 + '@types/node': 22.17.2 + transitivePeerDependencies: + - jiti + - less + - lightningcss + - msw + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + void-elements@3.1.0: {} vscode-jsonrpc@8.2.0: {} @@ -9794,6 +10126,11 @@ snapshots: dependencies: isexe: 2.0.0 + why-is-node-running@2.3.0: + dependencies: + siginfo: 2.0.0 + stackback: 0.0.2 + word-wrap@1.2.5: {} wrap-ansi@9.0.0: diff --git a/frontend/src/app/routes/agent.tsx b/frontend/src/app/routes/agent.tsx index cc236a2e2..a5caf7c34 100644 --- a/frontend/src/app/routes/agent.tsx +++ b/frontend/src/app/routes/agent.tsx @@ -13,6 +13,7 @@ import AgentTasks from '@/components/agent/agent-task' import ChatBox from '@/components/agent/chat-box' import AgentHeader from '@/components/header' import RightSidebar from '@/components/right-sidebar' +import { rewriteLocalhostUrl } from '@/lib/utils' import { sessionService } from '@/services/session.service' import { selectActiveTab, @@ -91,7 +92,7 @@ function AgentPageContent() { ) // PiP preview URL (mobile takes priority over fullstack) - const pipUrl = mobileWebPreviewUrl || previewUrl + const pipUrl = rewriteLocalhostUrl(mobileWebPreviewUrl || previewUrl) const showPiP = !isMobile && activeTab !== TAB.RESULT && @@ -160,6 +161,11 @@ function AgentPageContent() { fetchSession() }, 5000) } else { + // Redirect chat sessions to the chat page + if (data.agent_type === 'chat') { + navigate(`/chat?id=${sessionId}`, { replace: true }) + return + } dispatch(setSelectedFeature(data.agent_type ?? null)) dispatch(setProjectId(data.project_id ?? null)) setSessionData(data) diff --git a/frontend/src/app/routes/dashboard.tsx b/frontend/src/app/routes/dashboard.tsx index 01cefd65a..4901a122b 100644 --- a/frontend/src/app/routes/dashboard.tsx +++ b/frontend/src/app/routes/dashboard.tsx @@ -45,9 +45,11 @@ import { import { wishlistService } from '@/services/wishlist.service' import { sessionService } from '@/services/session.service' import { ISession } from '@/typings/agent' -import { deleteSession } from '@/state/slice/sessions' +import { deleteSession, selectActiveSessionId } from '@/state/slice/sessions' import { clearSessionState } from '@/state/slice/session-state' import { removePin } from '@/state/slice/pins' +import { setRunStatus } from '@/state/slice/agent' +import { setLoading } from '@/state' enum TAB { ALL = 'all', @@ -74,6 +76,7 @@ export function DashboardPage() { const currentPage = useAppSelector(selectSessionsPage) const limit = useAppSelector(selectSessionsLimit) const favoriteSessionIds = useAppSelector(selectFavoriteSessionIds) + const activeSessionId = useAppSelector(selectActiveSessionId) const handleBack = () => { navigate(-1) @@ -117,6 +120,10 @@ export function DashboardPage() { await dispatch(deleteSession(deleteSessionId)).unwrap() dispatch(clearSessionState(deleteSessionId)) dispatch(removePin(deleteSessionId)) + if (deleteSessionId === activeSessionId) { + dispatch(setRunStatus(null)) + dispatch(setLoading(false)) + } setIsDeleteDialogOpen(false) setDeleteSessionId(null) } catch (error) { diff --git a/frontend/src/app/routes/login.tsx b/frontend/src/app/routes/login.tsx index 8b278afef..427ad861a 100644 --- a/frontend/src/app/routes/login.tsx +++ b/frontend/src/app/routes/login.tsx @@ -1,5 +1,5 @@ import { useGoogleLogin } from '@react-oauth/google' -import { useCallback, useEffect, useMemo, useRef } from 'react' +import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { Link, useNavigate } from 'react-router' import { useForm } from 'react-hook-form' import { z } from 'zod' @@ -344,6 +344,10 @@ export function LoginPage() { /> {t('auth.continueWithII')} +

{t('auth.privacyNotice')}{' '}

@@ -359,4 +363,53 @@ export function LoginPage() { ) } +/** + * Dev login button - only shows if DEV_AUTH_ENABLED is set on backend + */ +function DevLoginButton({ + apiBaseUrl, + onSuccess +}: { + apiBaseUrl: string + onSuccess: (payload: IiAuthPayload | null | undefined) => Promise +}) { + const [isAvailable, setIsAvailable] = useState(null) + + useEffect(() => { + // Check if dev login is available + fetch(`${apiBaseUrl}/auth/dev/login`) + .then((res) => { + setIsAvailable(res.ok) + }) + .catch(() => setIsAvailable(false)) + }, [apiBaseUrl]) + + const handleDevLogin = async () => { + try { + const res = await fetch(`${apiBaseUrl}/auth/dev/login`) + if (!res.ok) { + throw new Error('Dev login failed') + } + const data = await res.json() + await onSuccess(data) + } catch (error) { + console.error('Dev login failed:', error) + } + } + + if (isAvailable !== true) { + return null + } + + return ( + + ) +} + export const Component = LoginPage diff --git a/frontend/src/components/agent/agent-result.tsx b/frontend/src/components/agent/agent-result.tsx index 55317f22b..6549281cd 100644 --- a/frontend/src/components/agent/agent-result.tsx +++ b/frontend/src/components/agent/agent-result.tsx @@ -7,6 +7,7 @@ import { selectIsLoading, selectIsSandboxIframeAwake, selectMessages, + selectSandboxStatus, useAppSelector } from '@/state' import { CommandType, TAB, TOOL } from '@/typings/agent' @@ -15,7 +16,7 @@ import MobileResult from './mobile-result' import { Icon } from '../ui/icon' import AwakeMeUpScreen from './awake-me-up-screen' import { useLocation, useParams } from 'react-router' -import { cn, isE2bLink } from '@/lib/utils' +import { cn, isSandboxLink, rewriteLocalhostUrl } from '@/lib/utils' import { DesignModeWrapper } from '@/components/design-mode' import { useTranslation } from 'react-i18next' import { @@ -45,6 +46,7 @@ const AgentResult = ({ className }: AgentResultProps) => { const activeTab = useAppSelector(selectActiveTab) const isSandboxIframeAwake = useAppSelector(selectIsSandboxIframeAwake) + const sandboxStatus = useAppSelector(selectSandboxStatus) const messages = useAppSelector(selectMessages) const isRunning = useAppSelector(selectIsLoading) const isShareMode = useMemo( @@ -89,7 +91,7 @@ const AgentResult = ({ className }: AgentResultProps) => { mobileAppResult as { web_preview_url?: string } ).web_preview_url if (webPreviewUrl) { - return webPreviewUrl + return rewriteLocalhostUrl(webPreviewUrl) } } @@ -106,7 +108,7 @@ const AgentResult = ({ className }: AgentResultProps) => { if (result && typeof result === 'object') { const previewUrl = (result as { preview_url?: string }).preview_url if (previewUrl) { - return previewUrl + return rewriteLocalhostUrl(previewUrl) } } return '' @@ -256,12 +258,12 @@ const AgentResult = ({ className }: AgentResultProps) => { const shouldShowAwakeScreen = useMemo(() => { return ( - isE2bLink(resultUrl) && + sandboxStatus === 'paused' && !isSandboxIframeAwake && !isRunning && !isShareMode ) - }, [resultUrl, isSandboxIframeAwake, isRunning, isShareMode]) + }, [sandboxStatus, isSandboxIframeAwake, isRunning, isShareMode]) // Extract slide data from SlideWrite and SlideEdit messages const slideContent = useMemo(() => { @@ -323,7 +325,7 @@ const AgentResult = ({ className }: AgentResultProps) => { // Check if design mode should be available (only for e2b sandbox websites) const isDesignModeAvailable = useMemo(() => { if (!resultUrl) return false - if (!isE2bLink(resultUrl)) return false + if (!isSandboxLink(resultUrl)) return false if (detectUrlType(resultUrl) !== 'website') return false if (isShareMode) return false return true @@ -338,8 +340,6 @@ const AgentResult = ({ className }: AgentResultProps) => { ) } - if (!resultUrl && !mobileAppUrl) return null - if (shouldShowAwakeScreen) return ( { /> ) + if (!resultUrl && !mobileAppUrl) return null + if (hasMobileAppTools && activeTab === TAB.RESULT) { return ( { const activeTab = useAppSelector(selectActiveTab) const vscodeUrl = useAppSelector(selectVscodeUrl) + const vncUrl = useAppSelector(selectVncUrl) const isShareMode = useMemo( () => location.pathname.includes('/share/'), @@ -44,6 +46,15 @@ const AgentTabs = ({ sessionId, projectId, agentType }: AgentTabsProps) => { window.open(vscodeUrl, '_blank') } + const handleOpenVNC = () => { + if (!vncUrl) { + toast.error(t('agentTab.errors.vncUrlMissing', 'noVNC URL not available')) + return + } + + window.open(vncUrl, '_blank') + } + const shouldShowProjectTab = useMemo(() => { if (isShareMode) { return false @@ -114,6 +125,15 @@ const AgentTabs = ({ sessionId, projectId, agentType }: AgentTabsProps) => { {t('agentTab.openInVSCode')} )} + {vncUrl && !isShareMode && ( + + )} {agentType === AGENT_TYPE.MOBILE_APP ? ( { const { t } = useTranslation() const messages = useAppSelector(selectMessages) + const isStopped = useAppSelector(selectIsStopped) const dispatch = useAppDispatch() const [plans, setPlans] = useState([]) @@ -28,6 +29,9 @@ const AgentTasks = ({ className }: AgentTasksProps) => { }, [messages]) useEffect(() => { + // Don't auto-promote tasks if the agent is stopped + if (isStopped) return + if (Array.isArray(plans)) { // Check if there are no in_progress tasks const hasInProgress = plans.some( @@ -50,11 +54,11 @@ const AgentTasks = ({ className }: AgentTasksProps) => { } } } - }, [plans, dispatch]) + }, [plans, dispatch, isStopped]) const inProgressPlans = useMemo( - () => countBy(plans, 'status').in_progress || 0, - [plans] + () => isStopped ? 0 : (countBy(plans, 'status').in_progress || 0), + [plans, isStopped] ) const completedPlans = useMemo( @@ -69,7 +73,7 @@ const AgentTasks = ({ className }: AgentTasksProps) => { className={`flex flex-col items-center justify-center w-full ${className}`} >

- {t('agent.tasks.inProgress')} + {isStopped ? t('agent.tasks.stopped', 'Stopped') : t('agent.tasks.inProgress')}

diff --git a/frontend/src/components/agent/subagent-container.tsx b/frontend/src/components/agent/subagent-container.tsx index f88149ba2..27f107240 100644 --- a/frontend/src/components/agent/subagent-container.tsx +++ b/frontend/src/components/agent/subagent-container.tsx @@ -7,12 +7,14 @@ import { CheckCircle2, XCircle, Loader2, - Clock + Clock, + StopCircle } from 'lucide-react' import { useState, useMemo } from 'react' import { useTranslation } from 'react-i18next' import { AgentContext, Message } from '@/typings/agent' import { formatDuration } from '@/lib/utils' +import { useAppSelector, selectIsStopped, selectIsLoading } from '@/state' interface SubagentContainerProps { agentContext: AgentContext @@ -23,7 +25,8 @@ interface SubagentContainerProps { enum SubAgentStatus { RUNNING = 'running', COMPLETED = 'completed', - FAILED = 'failed' + FAILED = 'failed', + STOPPED = 'stopped' } const SubagentContainer = ({ @@ -33,6 +36,8 @@ const SubagentContainer = ({ }: SubagentContainerProps) => { const { t } = useTranslation() const [isExpanded, setIsExpanded] = useState(true) + const isStopped = useAppSelector(selectIsStopped) + const isLoading = useAppSelector(selectIsLoading) // Calculate execution time const executionTime = useMemo(() => { @@ -51,6 +56,7 @@ const SubagentContainer = ({ }, [messages]) // Determine actual status - explicit failed status takes precedence over endTime + // Also check global isStopped/isLoading state to determine subagent status const actualStatus = useMemo(() => { if (agentContext.status === SubAgentStatus.FAILED) { return SubAgentStatus.FAILED @@ -58,14 +64,25 @@ const SubagentContainer = ({ if (agentContext.endTime) { return SubAgentStatus.COMPLETED } - return agentContext.status || SubAgentStatus.RUNNING - }, [agentContext.status, agentContext.endTime]) + const contextStatus = agentContext.status || SubAgentStatus.RUNNING + // If global agent is stopped and this subagent was still running, show as stopped + if (isStopped && contextStatus === SubAgentStatus.RUNNING) { + return SubAgentStatus.STOPPED + } + // If main agent is done (not loading, not stopped) and subagent is still "running", + // it means the subagent completed but wasn't marked - show as completed + if (!isLoading && !isStopped && contextStatus === SubAgentStatus.RUNNING) { + return SubAgentStatus.COMPLETED + } + return contextStatus + }, [agentContext.status, agentContext.endTime, isStopped, isLoading]) const statusLabel = useMemo(() => { const keyMap: Record = { [SubAgentStatus.RUNNING]: 'agent.subagent.status.running', [SubAgentStatus.COMPLETED]: 'agent.subagent.status.completed', - [SubAgentStatus.FAILED]: 'agent.subagent.status.failed' + [SubAgentStatus.FAILED]: 'agent.subagent.status.failed', + [SubAgentStatus.STOPPED]: 'agent.subagent.status.stopped' } return t(keyMap[actualStatus] || 'agent.subagent.status.running') }, [actualStatus, t]) @@ -77,6 +94,8 @@ const SubagentContainer = ({ return case SubAgentStatus.FAILED: return + case SubAgentStatus.STOPPED: + return case SubAgentStatus.RUNNING: return default: @@ -152,6 +171,7 @@ const SubagentContainer = ({ ${actualStatus === SubAgentStatus.COMPLETED ? 'bg-green-500/20 text-green-400' : ''} ${actualStatus === SubAgentStatus.RUNNING ? 'bg-blue-500/20 text-blue-400' : ''} ${actualStatus === SubAgentStatus.FAILED ? 'bg-red-500/20 text-red-400' : ''} + ${actualStatus === SubAgentStatus.STOPPED ? 'bg-yellow-500/20 text-yellow-400' : ''} `} > {statusLabel} diff --git a/frontend/src/components/chat-header-mobile.tsx b/frontend/src/components/chat-header-mobile.tsx index 27aff14cc..2cf4ce074 100644 --- a/frontend/src/components/chat-header-mobile.tsx +++ b/frontend/src/components/chat-header-mobile.tsx @@ -14,6 +14,7 @@ import { } from '@/state' import { deleteSession } from '@/state/slice/sessions' import { clearSessionState } from '@/state/slice/session-state' +import { setRunStatus } from '@/state/slice/agent' import { type ISession } from '@/typings/agent' import HeaderDropdownMenu from '@/components/header-dropdown-menu' import ShareConversation from '@/components/agent/share-conversation' @@ -74,6 +75,7 @@ const ChatHeaderMobile = ({ try { await dispatch(deleteSession(sessionId)).unwrap() dispatch(clearSessionState(sessionId)) + dispatch(setRunStatus(null)) setIsDeleteDialogOpen(false) navigate('/') } catch (error) { diff --git a/frontend/src/components/chat-header.tsx b/frontend/src/components/chat-header.tsx index 921b2c581..9abac8bbe 100644 --- a/frontend/src/components/chat-header.tsx +++ b/frontend/src/components/chat-header.tsx @@ -28,6 +28,7 @@ import { useSearchParams } from 'react-router' import { useNavigate } from 'react-router' import { deleteSession } from '@/state/slice/sessions' import { clearSessionState } from '@/state/slice/session-state' +import { setRunStatus } from '@/state/slice/agent' import ShareConversation from '@/components/agent/share-conversation' import { AlertDialog, @@ -126,6 +127,10 @@ const ChatHeader = ({ try { await dispatch(deleteSession(sessionId)).unwrap() dispatch(clearSessionState(sessionId)) + resetSessionState() + resetConversationState() + setSessionId(null) + dispatch(setRunStatus(null)) setIsDeleteDialogOpen(false) navigate('/') } catch (error) { diff --git a/frontend/src/components/header.tsx b/frontend/src/components/header.tsx index ec9b3e736..00396c0d8 100644 --- a/frontend/src/components/header.tsx +++ b/frontend/src/components/header.tsx @@ -20,6 +20,7 @@ import { } from '@/state' import { deleteSession } from '@/state/slice/sessions' import { clearSessionState } from '@/state/slice/session-state' +import { setRunStatus } from '@/state/slice/agent' import { ISession } from '@/typings' import { AlertDialog, @@ -90,6 +91,7 @@ const AgentHeader = ({ sessionData, isChatPage }: AgentHeaderProps) => { await dispatch(deleteSession(sessionId)).unwrap() // Clear cached session state to free up localStorage dispatch(clearSessionState(sessionId)) + dispatch(setRunStatus(null)) setIsDeleteDialogOpen(false) // Navigate to home page after deletion navigate('/') diff --git a/frontend/src/components/project-list.tsx b/frontend/src/components/project-list.tsx index 6464211fc..d5afc292e 100644 --- a/frontend/src/components/project-list.tsx +++ b/frontend/src/components/project-list.tsx @@ -45,6 +45,9 @@ import { hasSessionDisplayTitle } from '@/utils/session-title' interface ProjectListProps { workspaceInfo?: string isLoading: boolean + loadingMore: boolean + hasMore: boolean + onLoadMore: () => void handleResetState: () => void handleNewProject: () => void } @@ -52,6 +55,9 @@ interface ProjectListProps { const ProjectList = ({ workspaceInfo, isLoading, + loadingMore, + hasMore, + onLoadMore, handleResetState, handleNewProject }: ProjectListProps) => { @@ -322,6 +328,25 @@ const ProjectList = ({ {t('sidebar.seeMore')} )} + {loadingMore && ( +
+ {t('common.loadingMore')} +
+ )} + {!loadingMore && hasMore && showAllProjects && ( + + )}
{ e.preventDefault() e.stopPropagation() + setIsDropdownOpen(false) setIsDeleteDialogOpen(true) } @@ -105,6 +106,10 @@ const SessionItem = ({ await dispatch(deleteSession(session.id)).unwrap() dispatch(clearSessionState(session.id)) dispatch(removePin(session.id)) + if (isActive) { + dispatch(setRunStatus(null)) + dispatch(setLoading(false)) + } setIsDeleteDialogOpen(false) } catch (error) { console.error('Failed to delete session:', error) diff --git a/frontend/src/components/share-agent-content.tsx b/frontend/src/components/share-agent-content.tsx index b36a59d5d..e872bac26 100644 --- a/frontend/src/components/share-agent-content.tsx +++ b/frontend/src/components/share-agent-content.tsx @@ -28,7 +28,7 @@ import { import { BUILD_STEP, ISession, TAB } from '@/typings/agent' import AgentResult from '@/components/agent/agent-result' import AgentPopoverDone from '@/components/agent/agent-popover-done' -import { isE2bLink } from '@/lib/utils' +import { isSandboxLink } from '@/lib/utils' import { SidebarProvider } from '@/components/ui/sidebar' import AgentTabMobile, { type ChatOption as MobileChatOption @@ -76,7 +76,9 @@ export function ShareAgentContent() { fetchSession() }, 5000) } else { - dispatch(setSelectedFeature(data.agent_type ?? null)) + // Normalize chat sessions to 'general' to prevent invalid agent_type + const agentType = data.agent_type === 'chat' ? 'general' : (data.agent_type ?? null) + dispatch(setSelectedFeature(agentType)) setSessionData(data) setSessionError(null) // Clear any previous errors } @@ -234,7 +236,7 @@ export function ShareAgentContent() {
- {vscodeUrl && isE2bLink(vscodeUrl) && ( + {vscodeUrl && isSandboxLink(vscodeUrl) && (