Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ PROVIDER_API_KEY=
# OpenAI-compatible providers can use any env var name as long as config.yaml
# points model.api_key_env at the same name.
OPENAI_API_KEY=

# Required when tools.search_provider is set to 'tavily' in config.yaml.
TAVILY_API_KEY=
1 change: 1 addition & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ tools:
web: true
shell: true
files: true
# search_provider: duckduckgo # 'duckduckgo' (default) or 'tavily'
email:
enabled: false
smtp_host: ""
Expand Down
3 changes: 3 additions & 0 deletions nipux_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class ToolAccessConfig:
web: bool = True
shell: bool = True
files: bool = True
search_provider: str = "duckduckgo"


@dataclass(frozen=True)
Expand Down Expand Up @@ -214,6 +215,7 @@ def load_config(path: str | Path | None = None) -> AppConfig:
web=bool(tools_raw.get("web", True)),
shell=bool(tools_raw.get("shell", True)),
files=bool(tools_raw.get("files", True)),
search_provider=str(tools_raw.get("search_provider") or "duckduckgo"),
)
email = EmailConfig(
enabled=bool(email_raw.get("enabled", False)),
Expand Down Expand Up @@ -256,6 +258,7 @@ def default_config_yaml(
" web: true\n"
" shell: true\n"
" files: true\n"
" # search_provider: duckduckgo # 'duckduckgo' (default) or 'tavily'\n"
"email:\n"
" enabled: false\n"
" smtp_host: \"\"\n"
Expand Down
6 changes: 3 additions & 3 deletions nipux_cli/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,14 +853,14 @@ def _browser_call(name: str, args: dict[str, Any], ctx: ToolContext) -> str:


def _web_call(name: str, args: dict[str, Any], ctx: ToolContext) -> str:
del ctx
from nipux_cli.web import web_extract, web_search

search_provider = ctx.config.tools.search_provider
if name == "web_search":
return _json(web_search(str(args.get("query") or ""), limit=int(args.get("limit") or 5)))
return _json(web_search(str(args.get("query") or ""), limit=int(args.get("limit") or 5), search_provider=search_provider))
if name == "web_extract":
urls = args.get("urls") if isinstance(args.get("urls"), list) else []
return _json(web_extract(urls[:5]))
return _json(web_extract(urls[:5], search_provider=search_provider))
raise KeyError(name)


Expand Down
46 changes: 44 additions & 2 deletions nipux_cli/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _duckduckgo_link(raw: str) -> str:
return html.unescape(raw)


def web_search(query: str, *, limit: int = 5) -> dict[str, Any]:
def _ddg_search(query: str, *, limit: int = 5) -> dict[str, Any]:
url = "https://duckduckgo.com/html/?" + urllib.parse.urlencode({"q": query})
markup, _ = _request(url)
pattern = re.compile(
Expand All @@ -95,7 +95,7 @@ def web_search(query: str, *, limit: int = 5) -> dict[str, Any]:
return {"success": True, "query": query, "results": results}


def web_extract(urls: list[str], *, limit_chars: int = 12_000) -> dict[str, Any]:
def _ddg_extract(urls: list[str], *, limit_chars: int = 12_000) -> dict[str, Any]:
pages = []
for url in urls[:5]:
try:
Expand All @@ -119,3 +119,45 @@ def web_extract(urls: list[str], *, limit_chars: int = 12_000) -> dict[str, Any]
except Exception as exc:
pages.append({"url": url, "error": str(exc)})
return {"success": True, "pages": pages}


def _tavily_search(query: str, *, limit: int = 5) -> dict[str, Any]:
from tavily import TavilyClient

client = TavilyClient()
response = client.search(query=query, max_results=limit)
results = [
{"title": r.get("title", ""), "url": r.get("url", "")}
for r in response.get("results", [])
]
return {"success": True, "query": query, "results": results}


def _tavily_extract(urls: list[str], *, limit_chars: int = 12_000) -> dict[str, Any]:
from tavily import TavilyClient

client = TavilyClient()
response = client.extract(urls=urls[:5])
pages = []
for r in response.get("results", []):
text = r.get("raw_content") or r.get("text") or ""
pages.append({
"url": r.get("url", ""),
"text": text[:limit_chars],
"truncated": len(text) > limit_chars,
})
for f in response.get("failed_results", []):
pages.append({"url": f.get("url", ""), "error": f.get("error", "extraction failed")})
return {"success": True, "pages": pages}


def web_search(query: str, *, limit: int = 5, search_provider: str = "duckduckgo") -> dict[str, Any]:
if search_provider == "tavily":
return _tavily_search(query, limit=limit)
return _ddg_search(query, limit=limit)


def web_extract(urls: list[str], *, limit_chars: int = 12_000, search_provider: str = "duckduckgo") -> dict[str, Any]:
if search_provider == "tavily":
return _tavily_extract(urls, limit_chars=limit_chars)
return _ddg_extract(urls, limit_chars=limit_chars)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ classifiers = [
dependencies = [
"openai>=2.21.0,<3",
"pyyaml>=6.0.2,<7",
"tavily-python>=0.5.0",
]

[project.urls]
Expand Down