From 2bf90a333fc86244fc4dc7495c6fd17beba043a5 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Fri, 22 May 2026 17:32:20 +0530 Subject: [PATCH] feat(composio): add GitHub as a native memory provider Lifts the GitHub module from catalog-only into a full ComposioProvider that incrementally ingests issues and PRs the connected user is involved with into the Memory Tree. - provider.rs: ComposioProvider impl using GITHUB_SEARCH_ISSUES with `involves:{login}` + ISO 8601 cursor for incremental fetches - sync.rs: extract_issues, extract_issue_id, extract_issue_title, extract_issue_updated_at, extract_user_login helpers; html_url-based slug fallback for composite doc IDs - tests.rs: 38 unit tests covering all helpers + provider metadata - mod.rs: upgraded from catalog-stub to full provider layout - registry.rs: GitHubProvider registered in init_default_providers Closes #2408 --- .../composio/providers/github/mod.rs | 23 +- .../composio/providers/github/provider.rs | 424 ++++++++++++++++++ .../composio/providers/github/sync.rs | 248 ++++++++++ .../composio/providers/github/tests.rs | 181 ++++++++ src/openhuman/composio/providers/registry.rs | 1 + 5 files changed, 871 insertions(+), 6 deletions(-) create mode 100644 src/openhuman/composio/providers/github/provider.rs create mode 100644 src/openhuman/composio/providers/github/sync.rs create mode 100644 src/openhuman/composio/providers/github/tests.rs diff --git a/src/openhuman/composio/providers/github/mod.rs b/src/openhuman/composio/providers/github/mod.rs index acee9477a8..7b0385f476 100644 --- a/src/openhuman/composio/providers/github/mod.rs +++ b/src/openhuman/composio/providers/github/mod.rs @@ -1,11 +1,22 @@ -//! GitHub Composio toolkit — curated tool catalog only. +//! GitHub Composio provider — incremental Memory Tree ingest for issues and +//! pull requests involving the connected user. //! -//! There is no native [`super::ComposioProvider`] implementation for -//! GitHub yet (no profile fetch / sync). The curated catalog here is -//! still consulted by [`super::catalog_for_toolkit`] so the meta-tool -//! layer applies the same whitelist + scope filtering it does for -//! Gmail and Notion. +//! Mirrors the [`crate::openhuman::composio::providers::clickup`] layout so +//! anyone familiar with ClickUp/Notion ingestion can read this without +//! re-learning a new shape: +//! +//! - `provider.rs` — `impl ComposioProvider for GitHubProvider` +//! - `sync.rs` — payload-shape helpers (result extraction, title, cursor) +//! - `tools.rs` — `GITHUB_CURATED` whitelist of Composio actions +//! - `tests.rs` — unit tests for the helpers + trait metadata +//! +//! Issue: #2408. +mod provider; +mod sync; +#[cfg(test)] +mod tests; pub mod tools; +pub use provider::GitHubProvider; pub use tools::GITHUB_CURATED; diff --git a/src/openhuman/composio/providers/github/provider.rs b/src/openhuman/composio/providers/github/provider.rs new file mode 100644 index 0000000000..d7f160cb2c --- /dev/null +++ b/src/openhuman/composio/providers/github/provider.rs @@ -0,0 +1,424 @@ +//! GitHub provider — incremental sync of issues and pull requests involving +//! the authenticated user, with per-item persistence into the Memory Tree. +//! +//! On each sync pass: +//! +//! 1. Load persistent [`SyncState`] from the KV store. +//! 2. Check the daily request budget — bail early if exhausted. +//! 3. Resolve the authenticated user's GitHub login (used in the search +//! query); cached cheaply across re-fetches. +//! 4. Search for issues and PRs involving the user via +//! `GITHUB_SEARCH_ISSUES` with `involves:{login}`, filtered to items +//! updated since the cursor (when available). +//! 5. For each result, persist as a single memory document if it's new +//! *or* edited since the last sync. +//! 6. Advance the cursor to the newest `updated_at` seen and save. +//! +//! Privacy posture: the `involves:` search qualifier returns only items the +//! user created, was assigned to, mentioned in, or commented on — it never +//! surfaces private repos the user can't access. This mirrors the +//! "fetch-what-the-user-sees" model gmail / notion already follow. + +use async_trait::async_trait; +use serde_json::json; + +use super::sync; +use crate::openhuman::composio::providers::sync_state::{persist_single_item, SyncState}; +use crate::openhuman::composio::providers::{ + pick_str, ComposioProvider, CuratedTool, ProviderContext, ProviderUserProfile, SyncOutcome, + SyncReason, +}; + +pub(crate) const ACTION_GET_AUTHENTICATED_USER: &str = "GITHUB_GET_AUTHENTICATED_USER"; +pub(crate) const ACTION_SEARCH_ISSUES: &str = "GITHUB_SEARCH_ISSUES"; + +/// Items per search page on steady-state syncs. +const PAGE_SIZE: u32 = 50; + +/// Larger page for the initial post-OAuth backfill. +const INITIAL_PAGE_SIZE: u32 = 100; + +/// Maximum pages per sync pass. Caps initial-backfill churn; the rest rolls +/// over to the next scheduled interval. +const MAX_PAGES: u32 = 20; + +pub struct GitHubProvider; + +impl GitHubProvider { + pub fn new() -> Self { + Self + } +} + +impl Default for GitHubProvider { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl ComposioProvider for GitHubProvider { + fn toolkit_slug(&self) -> &'static str { + "github" + } + + fn curated_tools(&self) -> Option<&'static [CuratedTool]> { + Some(super::tools::GITHUB_CURATED) + } + + fn sync_interval_secs(&self) -> Option { + // 30 minutes — GitHub issues change less frequently than Slack + // messages, so a half-hour cadence keeps the memory fresh without + // hammering the search API. + Some(30 * 60) + } + + async fn fetch_user_profile( + &self, + ctx: &ProviderContext, + ) -> Result { + tracing::debug!( + connection_id = ?ctx.connection_id, + "[composio:github] fetch_user_profile via {ACTION_GET_AUTHENTICATED_USER}" + ); + + let resp = ctx + .execute(ACTION_GET_AUTHENTICATED_USER, Some(json!({}))) + .await + .map_err(|e| { + format!("[composio:github] {ACTION_GET_AUTHENTICATED_USER} failed: {e:#}") + })?; + + if !resp.successful { + let err = resp + .error + .clone() + .unwrap_or_else(|| "provider reported failure".to_string()); + return Err(format!( + "[composio:github] {ACTION_GET_AUTHENTICATED_USER}: {err}" + )); + } + + let data = &resp.data; + let login = sync::extract_user_login(data); + let display_name = pick_str(data, &["name", "data.name"]).or_else(|| login.clone()); + let email = pick_str(data, &["email", "data.email"]); + let avatar_url = pick_str(data, &["avatar_url", "data.avatar_url"]); + let profile_url = pick_str(data, &["html_url", "data.html_url"]); + + Ok(ProviderUserProfile { + toolkit: "github".to_string(), + connection_id: ctx.connection_id.clone(), + display_name, + email, + username: login, + avatar_url, + profile_url, + extras: data.clone(), + }) + } + + async fn sync(&self, ctx: &ProviderContext, reason: SyncReason) -> Result { + let started_at_ms = sync::now_ms(); + let connection_id = ctx + .connection_id + .clone() + .unwrap_or_else(|| "default".to_string()); + + tracing::info!( + connection_id = %connection_id, + reason = reason.as_str(), + "[composio:github] incremental sync starting" + ); + + // ── Step 1: load persistent sync state ────────────────────── + let Some(memory) = ctx.memory_client() else { + return Err("[composio:github] memory client not ready".to_string()); + }; + let mut state = SyncState::load(&memory, "github", &connection_id).await?; + + // ── Step 2: check daily budget ─────────────────────────────── + if state.budget_exhausted() { + tracing::info!( + connection_id = %connection_id, + "[composio:github] daily request budget exhausted, skipping sync" + ); + return Ok(SyncOutcome { + toolkit: "github".to_string(), + connection_id: Some(connection_id), + reason: reason.as_str().to_string(), + items_ingested: 0, + started_at_ms, + finished_at_ms: sync::now_ms(), + summary: "github sync skipped: daily budget exhausted".to_string(), + details: json!({ "budget_exhausted": true }), + }); + } + + // ── Step 3: resolve the authenticated user's login ────────── + let login = match self.resolve_login(ctx, &mut state).await { + Ok(l) => l, + Err(e) => { + let _ = state.save(&memory).await; + return Err(e); + } + }; + + if state.budget_exhausted() { + tracing::info!( + connection_id = %connection_id, + "[composio:github] budget exhausted after login probe, skipping sync" + ); + state.save(&memory).await?; + return Ok(SyncOutcome { + toolkit: "github".to_string(), + connection_id: Some(connection_id), + reason: reason.as_str().to_string(), + items_ingested: 0, + started_at_ms, + finished_at_ms: sync::now_ms(), + summary: "github sync skipped: daily budget exhausted after login probe" + .to_string(), + details: json!({ "budget_exhausted": true, "login_resolved": true }), + }); + } + + // ── Step 4: paginated issue search ─────────────────────────── + // + // `involves:{login}` matches issues/PRs the user created, was assigned + // to, was mentioned in, or commented on — scoped to what GitHub's own + // access rules allow. Combined with `updated:>{cursor}` on subsequent + // runs this converges on a minimal diff fetch. + let page_size = match reason { + SyncReason::ConnectionCreated => INITIAL_PAGE_SIZE, + _ => PAGE_SIZE, + }; + + // Build the base search query. + let query = match &state.cursor { + Some(cursor) => { + // GitHub's `updated:>` qualifier accepts ISO 8601 dates + // (YYYY-MM-DD or full datetime). Using the full stored cursor + // (e.g. `"2024-05-21T15:30:00Z"`) is accepted by the API and + // more precise than truncating to the day. + format!("involves:{login} updated:>{cursor}") + } + None => format!("involves:{login}"), + }; + + let mut total_fetched: usize = 0; + let mut total_persisted: usize = 0; + let mut newest_updated: Option = None; + + 'pages: for page_num in 1..=MAX_PAGES { + if state.budget_exhausted() { + tracing::info!( + page = page_num, + "[composio:github] budget exhausted mid-sync, stopping pagination" + ); + break; + } + + let args = json!({ + "q": query, + "sort": "updated", + "order": "desc", + "per_page": page_size, + "page": page_num, + }); + + tracing::debug!( + connection_id = %connection_id, + page = page_num, + query = %query, + "[composio:github] executing {ACTION_SEARCH_ISSUES}" + ); + + let resp = ctx + .execute(ACTION_SEARCH_ISSUES, Some(args)) + .await + .map_err(|e| { + format!("[composio:github] {ACTION_SEARCH_ISSUES} page={page_num}: {e:#}") + })?; + state.record_requests(1); + + if !resp.successful { + let err = resp + .error + .clone() + .unwrap_or_else(|| "provider reported failure".to_string()); + let _ = state.save(&memory).await; + return Err(format!( + "[composio:github] {ACTION_SEARCH_ISSUES} page={page_num}: {err}" + )); + } + + let issues = sync::extract_issues(&resp.data); + total_fetched += issues.len(); + + if issues.is_empty() { + tracing::debug!( + page = page_num, + "[composio:github] empty page, stopping pagination" + ); + break; + } + + // ── Per-item dedup + persist ───────────────────────────── + for issue in &issues { + let Some(issue_id) = sync::extract_issue_id(issue) else { + tracing::debug!("[composio:github] issue missing id, skipping"); + continue; + }; + + let updated = sync::extract_issue_updated_at(issue); + + // Track the newest `updated_at` for cursor advancement. + if let Some(ref ts) = updated { + if newest_updated.as_ref().is_none_or(|ex| ts > ex) { + newest_updated = Some(ts.clone()); + } + } + + // Composite dedup key: issue_id@updated_at (same trick ClickUp + // uses so that edits after the last sync are re-persisted). + let sync_key = match &updated { + Some(ts) => format!("{issue_id}@{ts}"), + None => issue_id.clone(), + }; + + // If the item's updated_at is at or before our cursor AND we've + // already synced this composite key, every subsequent result on + // this page is guaranteed to be older — stop pagination early. + if let (Some(ref cursor), Some(ref ts)) = (&state.cursor, &updated) { + if ts <= cursor && state.is_synced(&sync_key) { + tracing::debug!( + issue_id = %issue_id, + "[composio:github] reached cursor boundary, stopping" + ); + break 'pages; + } + } + + if state.is_synced(&sync_key) { + continue; + } + + let title_text = sync::extract_issue_title(issue) + .unwrap_or_else(|| format!("GitHub issue {issue_id}")); + let doc_id = format!("composio-github-issue-{issue_id}"); + + match persist_single_item( + &memory, + "github", + &doc_id, + &title_text, + issue, + "github", + ctx.connection_id.as_deref(), + ) + .await + { + Ok(_) => { + state.mark_synced(&sync_key); + total_persisted += 1; + } + Err(e) => { + tracing::warn!( + issue_id = %issue_id, + error = %e, + "[composio:github] failed to persist issue (continuing)" + ); + } + } + } + + // GitHub search pages are 0-indexed in terms of total results; + // a short page means we've exhausted the result set. + if (issues.len() as u32) < page_size { + tracing::debug!( + page = page_num, + returned = issues.len(), + "[composio:github] short page, end of results" + ); + break; + } + } + + // ── Step 5: advance cursor and save state ──────────────────── + if let Some(new_cursor) = newest_updated { + state.advance_cursor(&new_cursor); + } + state.set_last_sync_at_ms(sync::now_ms()); + state.save(&memory).await?; + + let finished_at_ms = sync::now_ms(); + let summary = format!( + "github sync ({reason}): fetched {total_fetched}, persisted {total_persisted} new, \ + budget remaining {remaining}", + reason = reason.as_str(), + remaining = state.budget_remaining(), + ); + tracing::info!( + connection_id = %connection_id, + elapsed_ms = finished_at_ms.saturating_sub(started_at_ms), + total_fetched, + total_persisted, + budget_remaining = state.budget_remaining(), + "[composio:github] incremental sync complete" + ); + + Ok(SyncOutcome { + toolkit: "github".to_string(), + connection_id: Some(connection_id), + reason: reason.as_str().to_string(), + items_ingested: total_persisted, + started_at_ms, + finished_at_ms, + summary, + details: json!({ + "issues_fetched": total_fetched, + "issues_persisted": total_persisted, + "budget_remaining": state.budget_remaining(), + "cursor": state.cursor, + "synced_ids_total": state.synced_ids.len(), + }), + }) + } +} + +impl GitHubProvider { + /// Resolve the authenticated user's GitHub login handle. + /// + /// The login is stable for the connection lifetime. We re-fetch on every + /// sync rather than caching in `SyncState` to (a) keep the struct lean + /// and (b) implicitly validate that the OAuth token is still valid before + /// we start paginating search results. + async fn resolve_login( + &self, + ctx: &ProviderContext, + state: &mut SyncState, + ) -> Result { + let resp = ctx + .execute(ACTION_GET_AUTHENTICATED_USER, Some(json!({}))) + .await + .map_err(|e| { + format!("[composio:github] {ACTION_GET_AUTHENTICATED_USER} failed: {e:#}") + })?; + state.record_requests(1); + + if !resp.successful { + let err = resp + .error + .clone() + .unwrap_or_else(|| "provider reported failure".to_string()); + return Err(format!( + "[composio:github] {ACTION_GET_AUTHENTICATED_USER}: {err}" + )); + } + + sync::extract_user_login(&resp.data).ok_or_else(|| { + "[composio:github] GITHUB_GET_AUTHENTICATED_USER returned no login".to_string() + }) + } +} diff --git a/src/openhuman/composio/providers/github/sync.rs b/src/openhuman/composio/providers/github/sync.rs new file mode 100644 index 0000000000..3804c520fc --- /dev/null +++ b/src/openhuman/composio/providers/github/sync.rs @@ -0,0 +1,248 @@ +//! GitHub sync helpers — result extraction, identity helpers, and time utilities. +//! +//! GitHub's REST API (proxied through Composio) returns search results and +//! authenticated-user payloads in a small number of shapes. The functions here +//! walk the union of common Composio envelope variants so the provider stays +//! clean and branch-free. + +use serde_json::Value; + +use crate::openhuman::composio::providers::pick_str; + +/// Walk the Composio response envelope for GitHub search issue results. +/// +/// `GITHUB_SEARCH_ISSUES` wraps GitHub's `GET /search/issues` response, which +/// returns `{"total_count": N, "items": [...]}`. Composio may re-wrap this under +/// `data` or `data.data`; we probe each shape in order. +pub(crate) fn extract_issues(data: &Value) -> Vec { + let candidates = [ + data.pointer("/data/items"), + data.pointer("/items"), + data.pointer("/data/data/items"), + data.pointer("/data/results"), + data.pointer("/results"), + ]; + for cand in candidates.into_iter().flatten() { + if let Some(arr) = cand.as_array() { + return arr.clone(); + } + } + Vec::new() +} + +/// Extract a stable, globally unique identifier for a GitHub issue or PR. +/// +/// GitHub's internal `id` field is a large integer unique across all issues +/// and PRs on github.com. We convert it to a string for use as a sync key. +/// Falls back to composing from `html_url` path if `id` is absent. +pub(crate) fn extract_issue_id(issue: &Value) -> Option { + // Primary: numeric internal GitHub ID. + if let Some(id) = issue.get("id").or_else(|| issue.pointer("/data/id")) { + if let Some(n) = id.as_u64() { + return Some(n.to_string()); + } + if let Some(s) = id.as_str() { + let trimmed = s.trim(); + if !trimmed.is_empty() { + return Some(trimmed.to_string()); + } + } + } + // Fallback: parse owner/repo/number from html_url path segments. + // URL shape: https://github.com/{owner}/{repo}/issues/{number} + if let Some(url) = pick_str(issue, &["html_url", "data.html_url", "url", "data.url"]) { + if let Some(slug) = github_url_to_slug(&url) { + return Some(slug); + } + } + None +} + +/// Build a human-readable document title for a GitHub issue/PR. +/// +/// Format: `GitHub: {owner}/{repo}#{number}: {title}`. +/// Falls back to just the title or a placeholder when fields are missing. +pub(crate) fn extract_issue_title(issue: &Value) -> Option { + let title = pick_str(issue, &["title", "data.title"])?; + + // Best-effort: extract owner/repo#N from html_url for the prefix. + let prefix = pick_str(issue, &["html_url", "data.html_url"]) + .and_then(|url| github_url_to_slug(&url)) + .unwrap_or_default(); + + if prefix.is_empty() { + Some(title) + } else { + Some(format!("GitHub: {prefix}: {title}")) + } +} + +/// Parse `https://github.com/{owner}/{repo}/issues/{number}` (or `/pull/`) +/// into `"{owner}/{repo}#{number}"`. Returns `None` for unrecognised shapes. +fn github_url_to_slug(url: &str) -> Option { + let segs: Vec<&str> = url.trim_end_matches('/').split('/').collect(); + // Minimum: ["https:", "", "github.com", owner, repo, "issues", number] + if segs.len() >= 7 { + let number = segs[segs.len() - 1]; + let _kind = segs[segs.len() - 2]; // "issues" or "pull" — ignored + let repo = segs[segs.len() - 3]; + let owner = segs[segs.len() - 4]; + if !owner.is_empty() && !repo.is_empty() && !number.is_empty() { + return Some(format!("{owner}/{repo}#{number}")); + } + } + None +} + +/// Extract the `updated_at` ISO 8601 timestamp from a GitHub issue. +/// +/// GitHub returns `updated_at` as `"2024-05-21T15:30:00Z"`. ISO 8601 strings +/// sort lexicographically, so we use them directly as the sync cursor. +pub(crate) fn extract_issue_updated_at(issue: &Value) -> Option { + pick_str( + issue, + &[ + "updated_at", + "data.updated_at", + "updatedAt", + "data.updatedAt", + ], + ) +} + +/// Extract the authenticated user's login handle from a +/// `GITHUB_GET_AUTHENTICATED_USER` response. +pub(crate) fn extract_user_login(data: &Value) -> Option { + pick_str(data, &["login", "data.login"]) +} + +/// Current wall-clock time in milliseconds since the UNIX epoch. +pub(crate) fn now_ms() -> u64 { + use std::time::{SystemTime, UNIX_EPOCH}; + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn extract_issues_from_data_items() { + let data = json!({ "data": { "items": [{"id": 1}] } }); + assert_eq!(extract_issues(&data).len(), 1); + } + + #[test] + fn extract_issues_from_top_level_items() { + let data = json!({ "items": [{"id": 1}, {"id": 2}] }); + assert_eq!(extract_issues(&data).len(), 2); + } + + #[test] + fn extract_issues_empty_when_missing() { + let data = json!({ "foo": "bar" }); + assert!(extract_issues(&data).is_empty()); + } + + #[test] + fn extract_issue_id_from_numeric_field() { + let issue = json!({ "id": 123456789u64, "title": "Fix bug" }); + assert_eq!(extract_issue_id(&issue), Some("123456789".to_string())); + } + + #[test] + fn extract_issue_id_from_wrapped_data() { + let issue = json!({ "data": { "id": 99u64 } }); + assert_eq!(extract_issue_id(&issue), Some("99".to_string())); + } + + #[test] + fn extract_issue_id_falls_back_to_html_url() { + let issue = json!({ + "html_url": "https://github.com/owner/repo/issues/42" + }); + assert_eq!(extract_issue_id(&issue), Some("owner/repo#42".to_string())); + } + + #[test] + fn extract_issue_id_none_when_missing() { + let issue = json!({ "title": "No ID here" }); + assert!(extract_issue_id(&issue).is_none()); + } + + #[test] + fn extract_issue_title_builds_prefixed_title() { + let issue = json!({ + "id": 1u64, + "title": "Fix race condition", + "html_url": "https://github.com/acme/core/issues/99" + }); + assert_eq!( + extract_issue_title(&issue), + Some("GitHub: acme/core#99: Fix race condition".to_string()) + ); + } + + #[test] + fn extract_issue_title_returns_raw_title_when_no_url() { + let issue = json!({ "title": "Bare title" }); + assert_eq!(extract_issue_title(&issue), Some("Bare title".to_string())); + } + + #[test] + fn extract_issue_title_none_when_missing() { + let issue = json!({ "id": 1u64 }); + assert!(extract_issue_title(&issue).is_none()); + } + + #[test] + fn extract_issue_updated_at_from_top_level() { + let issue = json!({ "updated_at": "2024-05-21T15:30:00Z" }); + assert_eq!( + extract_issue_updated_at(&issue), + Some("2024-05-21T15:30:00Z".to_string()) + ); + } + + #[test] + fn extract_issue_updated_at_from_data_wrapper() { + let issue = json!({ "data": { "updated_at": "2023-01-01T00:00:00Z" } }); + assert_eq!( + extract_issue_updated_at(&issue), + Some("2023-01-01T00:00:00Z".to_string()) + ); + } + + #[test] + fn extract_issue_updated_at_none_when_missing() { + let issue = json!({ "id": 1u64 }); + assert!(extract_issue_updated_at(&issue).is_none()); + } + + #[test] + fn extract_user_login_from_top_level() { + let data = json!({ "login": "octocat" }); + assert_eq!(extract_user_login(&data), Some("octocat".to_string())); + } + + #[test] + fn extract_user_login_from_data_wrapper() { + let data = json!({ "data": { "login": "monalisa" } }); + assert_eq!(extract_user_login(&data), Some("monalisa".to_string())); + } + + #[test] + fn extract_user_login_none_when_missing() { + let data = json!({ "id": 1u64 }); + assert!(extract_user_login(&data).is_none()); + } + + #[test] + fn now_ms_returns_nonzero() { + assert!(now_ms() > 0); + } +} diff --git a/src/openhuman/composio/providers/github/tests.rs b/src/openhuman/composio/providers/github/tests.rs new file mode 100644 index 0000000000..5269fcdbde --- /dev/null +++ b/src/openhuman/composio/providers/github/tests.rs @@ -0,0 +1,181 @@ +//! Unit tests for the GitHub Composio provider. + +use super::sync::{ + extract_issue_id, extract_issue_title, extract_issue_updated_at, extract_issues, + extract_user_login, +}; +use super::GitHubProvider; +use crate::openhuman::composio::providers::ComposioProvider; +use serde_json::json; + +// ── extract_issues ─────────────────────────────────────────────────────────── + +#[test] +fn extract_issues_walks_data_items_shape() { + let data = json!({ "data": { "items": [{"id": 1u64}] } }); + assert_eq!(extract_issues(&data).len(), 1); +} + +#[test] +fn extract_issues_walks_top_level_items_shape() { + let data = json!({ "items": [{"id": 1u64}, {"id": 2u64}] }); + assert_eq!(extract_issues(&data).len(), 2); +} + +#[test] +fn extract_issues_returns_empty_when_no_items_key() { + let data = json!({ "foo": "bar" }); + assert!(extract_issues(&data).is_empty()); +} + +#[test] +fn extract_issues_handles_data_data_nesting() { + let data = json!({ "data": { "data": { "items": [{"id": 9u64}] } } }); + assert_eq!(extract_issues(&data).len(), 1); +} + +// ── extract_issue_id ───────────────────────────────────────────────────────── + +#[test] +fn extract_issue_id_from_numeric_id() { + let issue = json!({ "id": 123456789u64, "title": "Fix race" }); + assert_eq!(extract_issue_id(&issue), Some("123456789".to_string())); +} + +#[test] +fn extract_issue_id_from_wrapped_data() { + let issue = json!({ "data": { "id": 42u64 } }); + assert_eq!(extract_issue_id(&issue), Some("42".to_string())); +} + +#[test] +fn extract_issue_id_falls_back_to_html_url_path() { + let issue = json!({ + "html_url": "https://github.com/owner/repo/issues/7" + }); + assert_eq!(extract_issue_id(&issue), Some("owner/repo#7".to_string())); +} + +#[test] +fn extract_issue_id_none_when_no_id_or_url() { + let issue = json!({ "title": "orphan" }); + assert!(extract_issue_id(&issue).is_none()); +} + +// ── extract_issue_title ────────────────────────────────────────────────────── + +#[test] +fn extract_issue_title_builds_prefixed_title() { + let issue = json!({ + "id": 1u64, + "title": "Fix race condition", + "html_url": "https://github.com/acme/core/issues/99" + }); + assert_eq!( + extract_issue_title(&issue), + Some("GitHub: acme/core#99: Fix race condition".to_string()) + ); +} + +#[test] +fn extract_issue_title_pr_url_also_works() { + let issue = json!({ + "id": 2u64, + "title": "Add feature", + "html_url": "https://github.com/org/repo/pull/101" + }); + assert_eq!( + extract_issue_title(&issue), + Some("GitHub: org/repo#101: Add feature".to_string()) + ); +} + +#[test] +fn extract_issue_title_returns_raw_title_when_no_url() { + let issue = json!({ "title": "Bare title" }); + assert_eq!(extract_issue_title(&issue), Some("Bare title".to_string())); +} + +#[test] +fn extract_issue_title_none_when_no_title() { + let issue = json!({ "id": 1u64 }); + assert!(extract_issue_title(&issue).is_none()); +} + +// ── extract_issue_updated_at ───────────────────────────────────────────────── + +#[test] +fn extract_issue_updated_at_from_top_level() { + let issue = json!({ "updated_at": "2024-05-21T15:30:00Z" }); + assert_eq!( + extract_issue_updated_at(&issue), + Some("2024-05-21T15:30:00Z".to_string()) + ); +} + +#[test] +fn extract_issue_updated_at_from_data_wrapper() { + let issue = json!({ "data": { "updated_at": "2023-01-01T00:00:00Z" } }); + assert_eq!( + extract_issue_updated_at(&issue), + Some("2023-01-01T00:00:00Z".to_string()) + ); +} + +#[test] +fn extract_issue_updated_at_none_when_missing() { + let issue = json!({ "id": 1u64 }); + assert!(extract_issue_updated_at(&issue).is_none()); +} + +// ── extract_user_login ─────────────────────────────────────────────────────── + +#[test] +fn extract_user_login_from_top_level() { + let data = json!({ "login": "octocat" }); + assert_eq!(extract_user_login(&data), Some("octocat".to_string())); +} + +#[test] +fn extract_user_login_from_data_wrapper() { + let data = json!({ "data": { "login": "monalisa" } }); + assert_eq!(extract_user_login(&data), Some("monalisa".to_string())); +} + +#[test] +fn extract_user_login_none_when_missing() { + let data = json!({ "id": 1u64 }); + assert!(extract_user_login(&data).is_none()); +} + +// ── provider metadata ──────────────────────────────────────────────────────── + +#[test] +fn provider_metadata_is_stable() { + let p = GitHubProvider::new(); + assert_eq!(p.toolkit_slug(), "github"); + assert_eq!(p.sync_interval_secs(), Some(30 * 60)); + assert!(p.curated_tools().is_some()); +} + +#[test] +fn curated_tools_contains_core_actions() { + let p = GitHubProvider::new(); + let curated = p.curated_tools().expect("GITHUB_CURATED is registered"); + let slugs: Vec<&str> = curated.iter().map(|t| t.slug).collect(); + assert!(slugs.contains(&"GITHUB_GET_AUTHENTICATED_USER")); + assert!(slugs.contains(&"GITHUB_SEARCH_ISSUES")); + assert!(slugs.contains(&"GITHUB_LIST_REPOSITORY_ISSUES")); +} + +#[test] +fn default_impl_matches_new() { + let a = GitHubProvider::new(); + let b = GitHubProvider::default(); + assert_eq!(a.toolkit_slug(), b.toolkit_slug()); + assert_eq!(a.sync_interval_secs(), b.sync_interval_secs()); + assert_eq!( + a.curated_tools().map(<[_]>::len), + b.curated_tools().map(<[_]>::len), + ); +} diff --git a/src/openhuman/composio/providers/registry.rs b/src/openhuman/composio/providers/registry.rs index 3f8e3d2ca7..554a9fad22 100644 --- a/src/openhuman/composio/providers/registry.rs +++ b/src/openhuman/composio/providers/registry.rs @@ -79,6 +79,7 @@ pub fn all_providers() -> Vec { /// Idempotent: re-running just re-registers (no-op in practice). pub fn init_default_providers() { register_provider(Arc::new(super::clickup::ClickUpProvider::new())); + register_provider(Arc::new(super::github::GitHubProvider::new())); register_provider(Arc::new(super::gmail::GmailProvider::new())); register_provider(Arc::new(super::notion::NotionProvider::new())); register_provider(Arc::new(super::slack::SlackProvider::new()));