From 89578408a29447798edfe5438d141a4f7d26cc2a Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Wed, 28 Aug 2024 04:05:36 +0530 Subject: [PATCH 01/11] basic search functionality added. Triggers when the RAG query returns no results Signed-off-by: suryyyansh --- Cargo.toml | 6 +- src/backend/ggml.rs | 29 ++++++++ src/error.rs | 3 + src/main.rs | 89 ++++++++++++++++++++++++ src/search/bing_search.rs | 56 +++++++++++++++ src/search/local_google_search.rs | 40 +++++++++++ src/search/mod.rs | 110 ++++++++++++++++++++++++++++++ src/search/tavily_search.rs | 44 ++++++++++++ src/utils.rs | 11 +++ 9 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 src/search/bing_search.rs create mode 100644 src/search/local_google_search.rs create mode 100644 src/search/mod.rs create mode 100644 src/search/tavily_search.rs diff --git a/Cargo.toml b/Cargo.toml index 92d5498..b492325 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,9 @@ version = "0.9.3" edition = "2021" [dependencies] +endpoints = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"} +llama-core = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev", features = ["full"]} +chat-prompts = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"} anyhow = "1.0.80" chat-prompts = { version = "=0.14.0" } chrono = "0.4.38" @@ -35,4 +38,5 @@ hyper = { git = "https://github.com/second-state/wasi_hyper.git", branch = "v0.1 tokio = { git = "https://github.com/second-state/wasi_tokio.git", branch = "v1.36.x" } [features] -default = [] +default = ["search"] +search = [] diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs index 4f2285d..b8d9d63 100644 --- a/src/backend/ggml.rs +++ b/src/backend/ggml.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "search")] +use crate::search::*; use crate::{error, utils::gen_chat_id, GLOBAL_RAG_PROMPT, SERVER_INFO}; use chat_prompts::{error as ChatPromptsError, MergeRagContext, MergeRagContextPolicy}; use endpoints::{ @@ -372,6 +374,9 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response } }; + #[cfg(feature = "search")] + let mut web_search_allowed: bool = false; + if let Some(ro) = res { match ro.points { Some(scored_points) => { @@ -379,6 +384,11 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response true => { // log warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold)); + #[cfg(feature = "search")] + { + info!(target: "stdout", "No points retrieved, enabling web search."); + web_search_allowed = true; + } } false => { // update messages with retrieved context @@ -435,10 +445,29 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response // log warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold )); + + #[cfg(feature = "search")] + { + info!(target: "stdout", "No points retrieved, enabling web search."); + web_search_allowed = true; + } } } } + #[cfg(feature = "search")] + if web_search_allowed { + // TODO: check the llamaedge-query-server if the current user query could use an internet search. + + info!(target: "stdout", "Performing web search."); + if let Err(e) = insert_search_results(&mut chat_request).await { + let err_msg = "encountered an error while appending search results.".to_string(); + // log + error!(target: "stdout", "{}", &err_msg); + return e; + } + } + // chat completion let res = match llama_core::chat::chat(&mut chat_request).await { Ok(result) => match result { diff --git a/src/error.rs b/src/error.rs index cd3ac19..a96c956 100644 --- a/src/error.rs +++ b/src/error.rs @@ -82,4 +82,7 @@ pub enum ServerError { ArgumentError(String), #[error("{0}")] Operation(String), + /// Conversion error when converting to SearchOutput + #[error("{0}")] + SearchConversionError(String), } diff --git a/src/main.rs b/src/main.rs index 4f8ab6e..7693b82 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,8 @@ extern crate log; mod backend; mod error; +#[cfg(feature = "search")] +mod search; mod utils; use anyhow::Result; @@ -16,11 +18,17 @@ use hyper::{ service::{make_service_fn, service_fn}, Body, Request, Response, Server, StatusCode, }; +#[cfg(feature = "search")] +use llama_core::search::{ContentType, SearchConfig}; use llama_core::MetadataBuilder; use once_cell::sync::OnceCell; +#[cfg(feature = "search")] +use search::*; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, net::SocketAddr, path::PathBuf}; use tokio::net::TcpListener; +#[cfg(feature = "search")] +use utils::SearchArguments; use utils::{is_valid_url, LogLevel}; type Error = Box; @@ -29,6 +37,12 @@ type Error = Box; pub(crate) static GLOBAL_RAG_PROMPT: OnceCell = OnceCell::new(); // server info pub(crate) static SERVER_INFO: OnceCell = OnceCell::new(); +// default SearchConfig +#[cfg(feature = "search")] +pub(crate) static SEARCH_CONFIG: OnceCell = OnceCell::new(); +// search related arguments passed on the command line +#[cfg(feature = "search")] +pub(crate) static SEARCH_ARGUMENTS: OnceCell = OnceCell::new(); // default socket address const DEFAULT_SOCKET_ADDRESS: &str = "0.0.0.0:8080"; @@ -127,6 +141,24 @@ struct Cli { /// Deprecated. Print all log information to stdout #[arg(long)] log_all: bool, + /// Maximum number search results to use. + #[arg(long, default_value = "5")] + max_search_results: u8, + /// Size to clip every result to. + #[arg(long, default_value = "300")] + size_limit_per_result: u16, + /// API key to be supplied to the endpoint, if supported. + #[arg(long, default_value = "")] + api_key: String, + /// System prompt explut ChatCompletionRequest: &aining to the LLM how to interpret search results. + #[arg( + long, + default_value = "You found the following search results on the internet. Use them to answer the user's query.\n\n" + )] + search_prompt: String, + /// API key to be supplied to the endpoint, if supported. + #[arg(long)] + summarize: bool, } #[tokio::main(flavor = "current_thread")] @@ -421,6 +453,63 @@ async fn main() -> Result<(), ServerError> { info!(target: "stdout", "gaianet_node_version: {}", node.as_ref().unwrap()); } + // setup search items + #[cfg(feature = "search")] + { + // by default, we will use Tavily. + let tavily_config = llama_core::search::SearchConfig::new( + "tavily".to_owned(), + cli.max_search_results, + cli.size_limit_per_result, + "https://api.tavily.com/search".to_owned(), + ContentType::JSON, + ContentType::JSON, + "POST".to_owned(), + None, + tavily_search::tavily_parser, + None, + None, + ); + + SEARCH_CONFIG + .set(tavily_config) + .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?; + + // Bing Search: + // + // let mut additional_headers = HashMap::new(); + // additional_headers.insert("Ocp-Apim-Subscription-Key".to_string(), cli.api_key.clone()); + // + // let bing_config = llama_core::search::SearchConfig::new( + // "bing".to_owned(), + // cli.max_search_results, + // cli.size_limit_per_result, + // // use of https requires the "full" or "https" feature + // "https://api.bing.microsoft.com/v7.0/search".to_owned(), + // ContentType::JSON, + // ContentType::JSON, + // "GET".to_owned(), + // Some(additional_headers), + // bing_search::bing_parser, + // None, + // None, + // ); + // + // SEARCH_CONFIG + // .set(bing_config) + // .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?; + + let search_arguments = SearchArguments { + api_key: cli.api_key.clone(), + search_prompt: cli.search_prompt.clone(), + summarize: cli.summarize, + }; + + SEARCH_ARGUMENTS + .set(search_arguments) + .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_owned()))?; + } + // create server info let server_info = ServerInfo { node, diff --git a/src/search/bing_search.rs b/src/search/bing_search.rs new file mode 100644 index 0000000..56caf1b --- /dev/null +++ b/src/search/bing_search.rs @@ -0,0 +1,56 @@ +use crate::error::ServerError; +use llama_core::search::{SearchOutput, SearchResult}; +use serde::Serialize; + +// Note: bing also requires the `Ocp-Apim-Subscription-Key` header: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/headers + +#[allow(non_snake_case)] +#[derive(Serialize)] +pub struct BingSearchInput { + /// The number of search results to return in the response. The default is 10 and the maximum value is 50. The actual number delivered may be less than requested. + pub count: u8, + /// The user's search query term. The term may not be empty. + pub q: String, + /// FIlter list for responses useful to the LLM. + pub responseFilter: String, +} + +#[allow(dead_code)] +pub fn bing_parser( + raw_results: &serde_json::Value, +) -> Result> { + println!("\n\n\n RAW RESULTS: \n\n\n {}", raw_results.to_string()); + + // parse webpages + let web_pages_object = match raw_results["webPages"].is_object() { + true => match raw_results["webPages"]["value"].as_array() { + Some(value) => value, + None => { + let msg = r#"could not convert the "value" field of "webPages" to an array"#; + error!(target: "bing_parser", "bing_parser: {}", msg); + return Err(Box::new(ServerError::SearchConversionError( + msg.to_string(), + ))); + } + }, + false => { + let msg = "no webpages found when parsing query."; + error!(target: "bing_parser", "bing_parser: {}", msg); + return Err(Box::new(ServerError::SearchConversionError( + msg.to_string(), + ))); + } + }; + + let mut results = Vec::new(); + for result in web_pages_object { + let current_result = SearchResult { + url: result["url"].to_string(), + site_name: result["siteName"].to_string(), + text_content: result["snippet"].to_string(), + }; + results.push(current_result); + } + + Ok(SearchOutput { results }) +} diff --git a/src/search/local_google_search.rs b/src/search/local_google_search.rs new file mode 100644 index 0000000..7f2b47a --- /dev/null +++ b/src/search/local_google_search.rs @@ -0,0 +1,40 @@ +use crate::error::ServerError; +use llama_core::search::{SearchOutput, SearchResult}; +use serde::Serialize; + +#[allow(non_snake_case)] +#[derive(Serialize)] +pub struct LocalGoogleSearchInput { + pub term: String, + pub engine: String, + pub maxSearchResults: u8, +} + +#[allow(dead_code)] +pub fn local_google_parser( + raw_results: &serde_json::Value, +) -> Result> { + let results_array = match raw_results.as_array() { + Some(array) => array, + None => { + let msg = "No results returned from server"; + error!(target: "search_server", "google_parser: {}", msg); + return Err(Box::new(ServerError::SearchConversionError( + msg.to_string(), + ))); + } + }; + + let mut results = Vec::new(); + + for result in results_array { + let current_result = SearchResult { + url: result["url"].to_string(), + site_name: result["siteName"].to_string(), + text_content: result["textContent"].to_string(), + }; + results.push(current_result) + } + + Ok(SearchOutput { results }) +} diff --git a/src/search/mod.rs b/src/search/mod.rs new file mode 100644 index 0000000..f13393b --- /dev/null +++ b/src/search/mod.rs @@ -0,0 +1,110 @@ +pub mod bing_search; +pub mod local_google_search; +pub mod tavily_search; + +use crate::{error, SEARCH_ARGUMENTS, SEARCH_CONFIG}; +use endpoints::chat::{ + ChatCompletionRequest, ChatCompletionRequestMessage, ChatCompletionSystemMessage, + ChatCompletionUserMessageContent, ContentPart, +}; + +#[allow(dead_code)] +pub(crate) async fn insert_search_results( + chat_request: &mut ChatCompletionRequest, +) -> Result<(), hyper::Response> { + let search_arguments = match SEARCH_ARGUMENTS.get() { + Some(sa) => sa, + None => { + return Err(error::internal_server_error( + "Failed to get `SEARCH_ARGUMENTS`. Was it set?", + )); + } + }; + + if let Some(ChatCompletionRequestMessage::User(ref message)) = chat_request.messages.last() { + let search_config = match SEARCH_CONFIG.get() { + Some(sc) => sc, + None => { + let err_msg = format!("Failed to obtain SEARCH_CONFIG. Was it set?"); + error!(target: "insert_search_results", "{}", &err_msg); + + return Err(error::internal_server_error(err_msg)); + } + }; + info!(target: "insert_search_results", "performing search"); + + let user_message_content = match message.content() { + ChatCompletionUserMessageContent::Text(message) => message.to_owned(), + ChatCompletionUserMessageContent::Parts(parts) => { + let mut message: String = "".to_owned(); + for part in parts { + match part { + ContentPart::Text(message_part) => { + message.push_str(message_part.text()); + } + ContentPart::Image(_) => {} + } + } + message + } + }; + + // set search input. + let search_input = tavily_search::TavilySearchInput { + api_key: search_arguments.api_key.to_owned(), + include_answer: false, + include_images: false, + query: user_message_content, + max_results: search_config.max_search_results, + include_raw_content: false, + search_depth: "advanced".to_owned(), + }; + + // Prepare the final `results` string for use as input. + let mut results = search_arguments.search_prompt.clone(); + + match search_arguments.summarize { + true => { + match search_config.summarize_search(&search_input).await { + // Append the result summary to the search prompt. + Ok(search_summary) => results += search_summary.as_str(), + Err(e) => { + let err_msg = format!( + "Failed to performing summarized search on SEACH_CONFIG {msg}", + msg = e + ); + error!(target: "insert_search_results", "{}", &err_msg); + + return Err(error::internal_server_error(err_msg)); + } + }; + } + false => { + let search_output: llama_core::search::SearchOutput = + match search_config.perform_search(&search_input).await { + Ok(search_output) => search_output, + Err(e) => { + let err_msg = + format!("Failed to perform search on SEACH_CONFIG: {msg}", msg = e); + error!(target: "insert_search_results", "{}", &err_msg); + + return Err(error::internal_server_error(err_msg)); + } + }; + + for result in search_output.results { + results.push_str(result.text_content.as_str()); + results.push_str("\n\n"); + } + } + } + + let system_search_result_message = ChatCompletionSystemMessage::new(results, None); + + chat_request.messages.insert( + chat_request.messages.len() - 1, + ChatCompletionRequestMessage::System(system_search_result_message), + ) + } + Ok(()) +} diff --git a/src/search/tavily_search.rs b/src/search/tavily_search.rs new file mode 100644 index 0000000..594b9fe --- /dev/null +++ b/src/search/tavily_search.rs @@ -0,0 +1,44 @@ +use crate::error::ServerError; +use llama_core::search::{SearchOutput, SearchResult}; +use serde::Serialize; + +#[allow(non_snake_case)] +#[derive(Serialize)] +pub struct TavilySearchInput { + pub api_key: String, + pub include_answer: bool, + pub include_images: bool, + pub query: String, + pub max_results: u8, + pub include_raw_content: bool, + pub search_depth: String, +} + +#[allow(dead_code)] +pub fn tavily_parser( + raw_results: &serde_json::Value, +) -> Result> { + let results_array = match raw_results["results"].as_array() { + Some(array) => array, + None => { + let msg = "No results returned from server"; + error!(target: "search_server", "google_parser: {}", msg); + return Err(Box::new(ServerError::SearchConversionError( + msg.to_string(), + ))); + } + }; + + let mut results = Vec::new(); + + for result in results_array { + let current_result = SearchResult { + url: result["url"].to_string(), + site_name: result["title"].to_string(), + text_content: result["content"].to_string(), + }; + results.push(current_result) + } + + Ok(SearchOutput { results }) +} diff --git a/src/utils.rs b/src/utils.rs index 837da3f..7bc6edf 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -9,6 +9,17 @@ pub(crate) fn gen_chat_id() -> String { format!("chatcmpl-{}", uuid::Uuid::new_v4()) } +/// Search related items that aren't directly supported by SearchConfig +#[cfg(feature = "search")] +pub(crate) struct SearchArguments { + /// API key to be supplied to the endpoint, if supported. Not used by Bing. + pub(crate) api_key: String, + /// System prompt explaining to the LLM how to interpret search results. + pub(crate) search_prompt: String, + /// Whether to summarize the search results before using them. + pub(crate) summarize: bool, +} + #[derive( Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum, Serialize, Deserialize, )] From fd54590cd2d7dc385dcec1da3eacfcaa2646fa3c Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Fri, 30 Aug 2024 01:13:35 +0530 Subject: [PATCH 02/11] Added search functionality with llamaedge query server Signed-off-by: suryyyansh --- src/backend/ggml.rs | 133 ++++++++++++++++++++++++++++-- src/error.rs | 3 - src/main.rs | 106 ++++++------------------ src/search/bing_search.rs | 56 ------------- src/search/local_google_search.rs | 40 --------- src/search/mod.rs | 110 ------------------------ src/search/tavily_search.rs | 44 ---------- src/utils.rs | 8 +- 8 files changed, 152 insertions(+), 348 deletions(-) delete mode 100644 src/search/bing_search.rs delete mode 100644 src/search/local_google_search.rs delete mode 100644 src/search/mod.rs delete mode 100644 src/search/tavily_search.rs diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs index b8d9d63..dfa646f 100644 --- a/src/backend/ggml.rs +++ b/src/backend/ggml.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "search")] -use crate::search::*; use crate::{error, utils::gen_chat_id, GLOBAL_RAG_PROMPT, SERVER_INFO}; use chat_prompts::{error as ChatPromptsError, MergeRagContext, MergeRagContextPolicy}; use endpoints::{ @@ -263,6 +261,9 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response info!(target: "stdout", "Compute embeddings for user query."); + #[cfg(feature = "search")] + let query: String; + // * compute embeddings for user query let embedding_response = match chat_request.messages.is_empty() { true => { @@ -289,6 +290,10 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response } }; + #[cfg(feature = "search")] + { + query = query_text.clone(); + } // log info!(target: "stdout", "query text: {}", query_text); @@ -384,6 +389,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response true => { // log warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold)); + #[cfg(feature = "search")] { info!(target: "stdout", "No points retrieved, enabling web search."); @@ -457,14 +463,123 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response #[cfg(feature = "search")] if web_search_allowed { - // TODO: check the llamaedge-query-server if the current user query could use an internet search. + let search_arguments = match crate::SEARCH_ARGUMENTS.get() { + Some(sc) => sc, + None => { + let err_msg = "Failed to obtain SEARCH_ARGUMENTS. Was it set?".to_string(); + error!(target: "stdout", "{}", &err_msg); - info!(target: "stdout", "Performing web search."); - if let Err(e) = insert_search_results(&mut chat_request).await { - let err_msg = "encountered an error while appending search results.".to_string(); - // log - error!(target: "stdout", "{}", &err_msg); - return e; + return error::internal_server_error(err_msg); + } + }; + + let endpoint: hyper::Uri = match search_arguments.query_server_url.parse() { + Ok(uri) => uri, + Err(e) => { + let err_msg = format!( + "LlamaEdge Query server URL could not be parsed: {}", + e.to_string() + ); + error!(target: "stdout", "{}", &err_msg); + + return error::internal_server_error(err_msg); + } + }; + + let summary_endpoint = match hyper::Uri::builder() + .scheme(endpoint.scheme().unwrap().to_string().as_str()) + .authority(endpoint.authority().unwrap().to_string().as_str()) + .path_and_query("/test/summarize") + .build() + { + Ok(se) => se, + Err(_) => { + let err_msg = "couldn't build summary_endpoint from query_server_url".to_string(); + error!(target: "stdout", "{}", &err_msg); + + return error::internal_server_error(err_msg); + } + }; + + //perform query, extract summary, add to + let req = match Request::builder() + .method(Method::POST) + .uri(summary_endpoint) + .header("content-type", "application/json") + .body(Body::from( + serde_json::json!({ + "search_config" : { + "api_key": search_arguments.api_key, + }, + "backend": search_arguments.search_backend, + "query": query, + }) + .to_string(), + )) { + Ok(request) => request, + Err(_) => { + let err_msg = "failed to build request to LLamaEdge query server.".to_string(); + error!(target: "stdout", "{}", &err_msg); + return error::internal_server_error(err_msg); + } + }; + + info!(target: "stdout", "Querying the LlamaEdge query server."); + + let client = hyper::client::Client::new(); + let res = match client.request(req).await { + Ok(response) => response, + Err(e) => { + let err_msg = format!( + "couldn't make request to LlamaEdge query server: {}", + e.to_string() + ); + error!(target: "stdout", "{}", &err_msg); + + return error::internal_server_error(err_msg); + } + }; + + let is_success = res.status().is_success(); + + let body_bytes = match hyper::body::to_bytes(res.into_body()).await { + Ok(bytes) => bytes, + Err(e) => { + let err_msg = format!("couldn't convert body into bytes: {}", e.to_string()); + error!(target: "stdout", "{}", &err_msg); + + return error::internal_server_error(err_msg); + } + }; + + let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) { + Ok(json) => json, + Err(e) => { + let err_msg = format!("couldn't convert body into json: {}", e.to_string()); + error!(target: "stdout", "{}", &err_msg); + + return error::internal_server_error(err_msg); + } + }; + + info!(target: "stdout", "processed query server response json body: \n{}", body_json); + + // if the request is a success, check decision and inject results accordingly. + if is_success { + if body_json["decision"].as_bool().unwrap_or(true) { + // the logic to ensure "results" is a serde_json::Value::String is present on the + // llamaedge-query-server. + let results = body_json["results"].as_str().unwrap_or(""); + + //inject search results + let system_search_result_message: ChatCompletionRequestMessage = + ChatCompletionRequestMessage::new_system_message(results, None); + + chat_request.messages.insert( + chat_request.messages.len() - 1, + system_search_result_message, + ) + } } } diff --git a/src/error.rs b/src/error.rs index a96c956..cd3ac19 100644 --- a/src/error.rs +++ b/src/error.rs @@ -82,7 +82,4 @@ pub enum ServerError { ArgumentError(String), #[error("{0}")] Operation(String), - /// Conversion error when converting to SearchOutput - #[error("{0}")] - SearchConversionError(String), } diff --git a/src/main.rs b/src/main.rs index 7693b82..ab03ed6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,7 @@ extern crate log; mod backend; mod error; -#[cfg(feature = "search")] -mod search; + mod utils; use anyhow::Result; @@ -18,12 +17,8 @@ use hyper::{ service::{make_service_fn, service_fn}, Body, Request, Response, Server, StatusCode, }; -#[cfg(feature = "search")] -use llama_core::search::{ContentType, SearchConfig}; use llama_core::MetadataBuilder; use once_cell::sync::OnceCell; -#[cfg(feature = "search")] -use search::*; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, net::SocketAddr, path::PathBuf}; use tokio::net::TcpListener; @@ -37,10 +32,7 @@ type Error = Box; pub(crate) static GLOBAL_RAG_PROMPT: OnceCell = OnceCell::new(); // server info pub(crate) static SERVER_INFO: OnceCell = OnceCell::new(); -// default SearchConfig -#[cfg(feature = "search")] -pub(crate) static SEARCH_CONFIG: OnceCell = OnceCell::new(); -// search related arguments passed on the command line +// search cli arguments #[cfg(feature = "search")] pub(crate) static SEARCH_ARGUMENTS: OnceCell = OnceCell::new(); @@ -141,24 +133,18 @@ struct Cli { /// Deprecated. Print all log information to stdout #[arg(long)] log_all: bool, - /// Maximum number search results to use. - #[arg(long, default_value = "5")] - max_search_results: u8, - /// Size to clip every result to. - #[arg(long, default_value = "300")] - size_limit_per_result: u16, /// API key to be supplied to the endpoint, if supported. + #[cfg(feature = "search")] #[arg(long, default_value = "")] api_key: String, - /// System prompt explut ChatCompletionRequest: &aining to the LLM how to interpret search results. - #[arg( - long, - default_value = "You found the following search results on the internet. Use them to answer the user's query.\n\n" - )] - search_prompt: String, - /// API key to be supplied to the endpoint, if supported. - #[arg(long)] - summarize: bool, + /// The URL for the LlamaEdge query server. Supplying this implies usage. + #[cfg(feature = "search")] + #[arg(long, required = true)] + query_server_url: String, + /// The URL for the LlamaEdge query server. Supplying this implies usage. + #[cfg(feature = "search")] + #[arg(long, default_value = "tavily", requires = "query-server-url")] + search_backend: String, } #[tokio::main(flavor = "current_thread")] @@ -453,63 +439,6 @@ async fn main() -> Result<(), ServerError> { info!(target: "stdout", "gaianet_node_version: {}", node.as_ref().unwrap()); } - // setup search items - #[cfg(feature = "search")] - { - // by default, we will use Tavily. - let tavily_config = llama_core::search::SearchConfig::new( - "tavily".to_owned(), - cli.max_search_results, - cli.size_limit_per_result, - "https://api.tavily.com/search".to_owned(), - ContentType::JSON, - ContentType::JSON, - "POST".to_owned(), - None, - tavily_search::tavily_parser, - None, - None, - ); - - SEARCH_CONFIG - .set(tavily_config) - .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?; - - // Bing Search: - // - // let mut additional_headers = HashMap::new(); - // additional_headers.insert("Ocp-Apim-Subscription-Key".to_string(), cli.api_key.clone()); - // - // let bing_config = llama_core::search::SearchConfig::new( - // "bing".to_owned(), - // cli.max_search_results, - // cli.size_limit_per_result, - // // use of https requires the "full" or "https" feature - // "https://api.bing.microsoft.com/v7.0/search".to_owned(), - // ContentType::JSON, - // ContentType::JSON, - // "GET".to_owned(), - // Some(additional_headers), - // bing_search::bing_parser, - // None, - // None, - // ); - // - // SEARCH_CONFIG - // .set(bing_config) - // .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?; - - let search_arguments = SearchArguments { - api_key: cli.api_key.clone(), - search_prompt: cli.search_prompt.clone(), - summarize: cli.summarize, - }; - - SEARCH_ARGUMENTS - .set(search_arguments) - .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_owned()))?; - } - // create server info let server_info = ServerInfo { node, @@ -541,6 +470,19 @@ async fn main() -> Result<(), ServerError> { } }); + #[cfg(feature = "search")] + { + let search_arguments = SearchArguments { + api_key: cli.api_key, + query_server_url: cli.query_server_url, + search_backend: cli.search_backend, + }; + + SEARCH_ARGUMENTS + .set(search_arguments) + .map_err(|_| ServerError::Operation("Failed to set `SERVER_INFO`.".to_string()))?; + } + // let server = Server::bind(&addr).serve(new_service); let tcp_listener = TcpListener::bind(addr).await.unwrap(); diff --git a/src/search/bing_search.rs b/src/search/bing_search.rs deleted file mode 100644 index 56caf1b..0000000 --- a/src/search/bing_search.rs +++ /dev/null @@ -1,56 +0,0 @@ -use crate::error::ServerError; -use llama_core::search::{SearchOutput, SearchResult}; -use serde::Serialize; - -// Note: bing also requires the `Ocp-Apim-Subscription-Key` header: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/headers - -#[allow(non_snake_case)] -#[derive(Serialize)] -pub struct BingSearchInput { - /// The number of search results to return in the response. The default is 10 and the maximum value is 50. The actual number delivered may be less than requested. - pub count: u8, - /// The user's search query term. The term may not be empty. - pub q: String, - /// FIlter list for responses useful to the LLM. - pub responseFilter: String, -} - -#[allow(dead_code)] -pub fn bing_parser( - raw_results: &serde_json::Value, -) -> Result> { - println!("\n\n\n RAW RESULTS: \n\n\n {}", raw_results.to_string()); - - // parse webpages - let web_pages_object = match raw_results["webPages"].is_object() { - true => match raw_results["webPages"]["value"].as_array() { - Some(value) => value, - None => { - let msg = r#"could not convert the "value" field of "webPages" to an array"#; - error!(target: "bing_parser", "bing_parser: {}", msg); - return Err(Box::new(ServerError::SearchConversionError( - msg.to_string(), - ))); - } - }, - false => { - let msg = "no webpages found when parsing query."; - error!(target: "bing_parser", "bing_parser: {}", msg); - return Err(Box::new(ServerError::SearchConversionError( - msg.to_string(), - ))); - } - }; - - let mut results = Vec::new(); - for result in web_pages_object { - let current_result = SearchResult { - url: result["url"].to_string(), - site_name: result["siteName"].to_string(), - text_content: result["snippet"].to_string(), - }; - results.push(current_result); - } - - Ok(SearchOutput { results }) -} diff --git a/src/search/local_google_search.rs b/src/search/local_google_search.rs deleted file mode 100644 index 7f2b47a..0000000 --- a/src/search/local_google_search.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::error::ServerError; -use llama_core::search::{SearchOutput, SearchResult}; -use serde::Serialize; - -#[allow(non_snake_case)] -#[derive(Serialize)] -pub struct LocalGoogleSearchInput { - pub term: String, - pub engine: String, - pub maxSearchResults: u8, -} - -#[allow(dead_code)] -pub fn local_google_parser( - raw_results: &serde_json::Value, -) -> Result> { - let results_array = match raw_results.as_array() { - Some(array) => array, - None => { - let msg = "No results returned from server"; - error!(target: "search_server", "google_parser: {}", msg); - return Err(Box::new(ServerError::SearchConversionError( - msg.to_string(), - ))); - } - }; - - let mut results = Vec::new(); - - for result in results_array { - let current_result = SearchResult { - url: result["url"].to_string(), - site_name: result["siteName"].to_string(), - text_content: result["textContent"].to_string(), - }; - results.push(current_result) - } - - Ok(SearchOutput { results }) -} diff --git a/src/search/mod.rs b/src/search/mod.rs deleted file mode 100644 index f13393b..0000000 --- a/src/search/mod.rs +++ /dev/null @@ -1,110 +0,0 @@ -pub mod bing_search; -pub mod local_google_search; -pub mod tavily_search; - -use crate::{error, SEARCH_ARGUMENTS, SEARCH_CONFIG}; -use endpoints::chat::{ - ChatCompletionRequest, ChatCompletionRequestMessage, ChatCompletionSystemMessage, - ChatCompletionUserMessageContent, ContentPart, -}; - -#[allow(dead_code)] -pub(crate) async fn insert_search_results( - chat_request: &mut ChatCompletionRequest, -) -> Result<(), hyper::Response> { - let search_arguments = match SEARCH_ARGUMENTS.get() { - Some(sa) => sa, - None => { - return Err(error::internal_server_error( - "Failed to get `SEARCH_ARGUMENTS`. Was it set?", - )); - } - }; - - if let Some(ChatCompletionRequestMessage::User(ref message)) = chat_request.messages.last() { - let search_config = match SEARCH_CONFIG.get() { - Some(sc) => sc, - None => { - let err_msg = format!("Failed to obtain SEARCH_CONFIG. Was it set?"); - error!(target: "insert_search_results", "{}", &err_msg); - - return Err(error::internal_server_error(err_msg)); - } - }; - info!(target: "insert_search_results", "performing search"); - - let user_message_content = match message.content() { - ChatCompletionUserMessageContent::Text(message) => message.to_owned(), - ChatCompletionUserMessageContent::Parts(parts) => { - let mut message: String = "".to_owned(); - for part in parts { - match part { - ContentPart::Text(message_part) => { - message.push_str(message_part.text()); - } - ContentPart::Image(_) => {} - } - } - message - } - }; - - // set search input. - let search_input = tavily_search::TavilySearchInput { - api_key: search_arguments.api_key.to_owned(), - include_answer: false, - include_images: false, - query: user_message_content, - max_results: search_config.max_search_results, - include_raw_content: false, - search_depth: "advanced".to_owned(), - }; - - // Prepare the final `results` string for use as input. - let mut results = search_arguments.search_prompt.clone(); - - match search_arguments.summarize { - true => { - match search_config.summarize_search(&search_input).await { - // Append the result summary to the search prompt. - Ok(search_summary) => results += search_summary.as_str(), - Err(e) => { - let err_msg = format!( - "Failed to performing summarized search on SEACH_CONFIG {msg}", - msg = e - ); - error!(target: "insert_search_results", "{}", &err_msg); - - return Err(error::internal_server_error(err_msg)); - } - }; - } - false => { - let search_output: llama_core::search::SearchOutput = - match search_config.perform_search(&search_input).await { - Ok(search_output) => search_output, - Err(e) => { - let err_msg = - format!("Failed to perform search on SEACH_CONFIG: {msg}", msg = e); - error!(target: "insert_search_results", "{}", &err_msg); - - return Err(error::internal_server_error(err_msg)); - } - }; - - for result in search_output.results { - results.push_str(result.text_content.as_str()); - results.push_str("\n\n"); - } - } - } - - let system_search_result_message = ChatCompletionSystemMessage::new(results, None); - - chat_request.messages.insert( - chat_request.messages.len() - 1, - ChatCompletionRequestMessage::System(system_search_result_message), - ) - } - Ok(()) -} diff --git a/src/search/tavily_search.rs b/src/search/tavily_search.rs deleted file mode 100644 index 594b9fe..0000000 --- a/src/search/tavily_search.rs +++ /dev/null @@ -1,44 +0,0 @@ -use crate::error::ServerError; -use llama_core::search::{SearchOutput, SearchResult}; -use serde::Serialize; - -#[allow(non_snake_case)] -#[derive(Serialize)] -pub struct TavilySearchInput { - pub api_key: String, - pub include_answer: bool, - pub include_images: bool, - pub query: String, - pub max_results: u8, - pub include_raw_content: bool, - pub search_depth: String, -} - -#[allow(dead_code)] -pub fn tavily_parser( - raw_results: &serde_json::Value, -) -> Result> { - let results_array = match raw_results["results"].as_array() { - Some(array) => array, - None => { - let msg = "No results returned from server"; - error!(target: "search_server", "google_parser: {}", msg); - return Err(Box::new(ServerError::SearchConversionError( - msg.to_string(), - ))); - } - }; - - let mut results = Vec::new(); - - for result in results_array { - let current_result = SearchResult { - url: result["url"].to_string(), - site_name: result["title"].to_string(), - text_content: result["content"].to_string(), - }; - results.push(current_result) - } - - Ok(SearchOutput { results }) -} diff --git a/src/utils.rs b/src/utils.rs index 7bc6edf..20caecf 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -14,10 +14,10 @@ pub(crate) fn gen_chat_id() -> String { pub(crate) struct SearchArguments { /// API key to be supplied to the endpoint, if supported. Not used by Bing. pub(crate) api_key: String, - /// System prompt explaining to the LLM how to interpret search results. - pub(crate) search_prompt: String, - /// Whether to summarize the search results before using them. - pub(crate) summarize: bool, + /// The URL for the LlamaEdge query server. Supplying this implies usage. + pub(crate) query_server_url: String, + /// The URL for the LlamaEdge query server. Supplying this implies usage. + pub(crate) search_backend: String, } #[derive( From 9f2c1727206b70de9118838d5e81ef824ae467cd Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Fri, 30 Aug 2024 01:41:26 +0530 Subject: [PATCH 03/11] fix CI warnings Signed-off-by: suryyyansh --- src/backend/ggml.rs | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs index dfa646f..182fd74 100644 --- a/src/backend/ggml.rs +++ b/src/backend/ggml.rs @@ -476,10 +476,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response let endpoint: hyper::Uri = match search_arguments.query_server_url.parse() { Ok(uri) => uri, Err(e) => { - let err_msg = format!( - "LlamaEdge Query server URL could not be parsed: {}", - e.to_string() - ); + let err_msg = format!("LlamaEdge Query server URL could not be parsed: {}", e); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); @@ -530,10 +527,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response let res = match client.request(req).await { Ok(response) => response, Err(e) => { - let err_msg = format!( - "couldn't make request to LlamaEdge query server: {}", - e.to_string() - ); + let err_msg = format!("couldn't make request to LlamaEdge query server: {}", e); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); @@ -545,7 +539,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response let body_bytes = match hyper::body::to_bytes(res.into_body()).await { Ok(bytes) => bytes, Err(e) => { - let err_msg = format!("couldn't convert body into bytes: {}", e.to_string()); + let err_msg = format!("couldn't convert body into bytes: {}", e); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); @@ -555,7 +549,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) { Ok(json) => json, Err(e) => { - let err_msg = format!("couldn't convert body into json: {}", e.to_string()); + let err_msg = format!("couldn't convert body into json: {}", e); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); @@ -565,21 +559,19 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response info!(target: "stdout", "processed query server response json body: \n{}", body_json); // if the request is a success, check decision and inject results accordingly. - if is_success { - if body_json["decision"].as_bool().unwrap_or(true) { - // the logic to ensure "results" is a serde_json::Value::String is present on the - // llamaedge-query-server. - let results = body_json["results"].as_str().unwrap_or(""); - - //inject search results - let system_search_result_message: ChatCompletionRequestMessage = - ChatCompletionRequestMessage::new_system_message(results, None); - - chat_request.messages.insert( - chat_request.messages.len() - 1, - system_search_result_message, - ) - } + if is_success && body_json["decision"].as_bool().unwrap_or(true) { + // the logic to ensure "results" is a serde_json::Value::String is present on the + // llamaedge-query-server. + let results = body_json["results"].as_str().unwrap_or(""); + + //inject search results + let system_search_result_message: ChatCompletionRequestMessage = + ChatCompletionRequestMessage::new_system_message(results, None); + + chat_request.messages.insert( + chat_request.messages.len() - 1, + system_search_result_message, + ) } } From 1077a91700ed8d6ffb924c6f4d0c4ca413da0982 Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Fri, 30 Aug 2024 01:49:16 +0530 Subject: [PATCH 04/11] updated search_backend definition Signed-off-by: suryyyansh --- src/main.rs | 2 +- src/utils.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index ab03ed6..24cb70f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -141,7 +141,7 @@ struct Cli { #[cfg(feature = "search")] #[arg(long, required = true)] query_server_url: String, - /// The URL for the LlamaEdge query server. Supplying this implies usage. + /// The search API backend to use for internet search. #[cfg(feature = "search")] #[arg(long, default_value = "tavily", requires = "query-server-url")] search_backend: String, diff --git a/src/utils.rs b/src/utils.rs index 20caecf..0e22ff4 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -16,7 +16,7 @@ pub(crate) struct SearchArguments { pub(crate) api_key: String, /// The URL for the LlamaEdge query server. Supplying this implies usage. pub(crate) query_server_url: String, - /// The URL for the LlamaEdge query server. Supplying this implies usage. + /// The search API backend to use for requests. pub(crate) search_backend: String, } From e48939fd9bbe07db7fa514e7272aa08d9e907150 Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Fri, 30 Aug 2024 14:15:22 +0530 Subject: [PATCH 05/11] updated summarization endpoint Signed-off-by: suryyyansh --- src/backend/ggml.rs | 2 +- src/utils.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs index 182fd74..0644e49 100644 --- a/src/backend/ggml.rs +++ b/src/backend/ggml.rs @@ -486,7 +486,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response let summary_endpoint = match hyper::Uri::builder() .scheme(endpoint.scheme().unwrap().to_string().as_str()) .authority(endpoint.authority().unwrap().to_string().as_str()) - .path_and_query("/test/summarize") + .path_and_query("/query/summarize") .build() { Ok(se) => se, diff --git a/src/utils.rs b/src/utils.rs index 0e22ff4..95050ab 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -16,7 +16,7 @@ pub(crate) struct SearchArguments { pub(crate) api_key: String, /// The URL for the LlamaEdge query server. Supplying this implies usage. pub(crate) query_server_url: String, - /// The search API backend to use for requests. + /// The search API backend to use for internet search. pub(crate) search_backend: String, } From 9af759b552658d1b20170565d98e97397acaa3d7 Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Mon, 2 Sep 2024 18:11:13 +0530 Subject: [PATCH 06/11] updated dependencies Signed-off-by: suryyyansh --- Cargo.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b492325..3e9f2a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,9 +4,6 @@ version = "0.9.3" edition = "2021" [dependencies] -endpoints = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"} -llama-core = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev", features = ["full"]} -chat-prompts = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"} anyhow = "1.0.80" chat-prompts = { version = "=0.14.0" } chrono = "0.4.38" @@ -38,5 +35,5 @@ hyper = { git = "https://github.com/second-state/wasi_hyper.git", branch = "v0.1 tokio = { git = "https://github.com/second-state/wasi_tokio.git", branch = "v1.36.x" } [features] -default = ["search"] +default = [] search = [] From 8b3d4bfff1f64a6802accd5faeb779e7f5243ca7 Mon Sep 17 00:00:00 2001 From: suryansh <118013430+suryyyansh@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:22:13 +0530 Subject: [PATCH 07/11] Update README.md with search usage instructions --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fb8b00e..4e89214 100644 --- a/README.md +++ b/README.md @@ -444,16 +444,16 @@ git clone https://github.com/LlamaEdge/rag-api-server.git cd rag-api-server # (Optional) Add the `wasm32-wasi` target to the Rust toolchain -rustup target add wasm32-wasi +rustup target add wasm32-wasip1 -# Build `rag-api-server.wasm` with the `http` support only, or -cargo build --target wasm32-wasi --release +# Build `rag-api-server.wasm` without internet search +cargo build --target wasm32-wasip1 --release -# Build `rag-api-server.wasm` with both `http` and `https` support -cargo build --target wasm32-wasi --release --features full +# Build `rag-api-server.wasm` with internet search capability +cargo build --target wasm32-wasip1 --release --features search # Copy the `rag-api-server.wasm` to the root directory -cp target/wasm32-wasi/release/rag-api-server.wasm . +cp target/wasm32-wasip1/release/rag-api-server.wasm . ```
To check the CLI options, @@ -524,6 +524,19 @@ To check the CLI options of the `rag-api-server` wasm app, you can run the follo Print version ``` +Compiling the server with the `search` feature enabled (using either the `--features search` flag when building or editing `Cargo.toml`), the following extra CLI arguments will be made available: + +```bash + --api-key + API key to be supplied to the endpoint, if supported + [default: ] + --query-server-url + The URL for the LlamaEdge query server. Supplying this implies usage + --search-backend + The search API backend to use for internet search + [default: tavily] +``` +
## Execute @@ -547,6 +560,8 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht docker run -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_storage:/qdrant/storage:z qdrant/qdrant ``` +### Start without Internet Serach + - Start an instance of LlamaEdge-RAG API server ```bash @@ -561,6 +576,25 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht --log-stat ``` +### Start with Internet Search + + - Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally. + + ```bash + wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \ + --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \ + rag-api-server.wasm \ + --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \ + --ctx-size 4096,384 \ + --prompt-template llama-2-chat,embedding \ + --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \ + --api-key "xxx" \ # Use if your chosen LlamaEdge query server endpoint requires one. + --query-server-url "http://0.0.0.0:8081/" \ # URL of the LlamaEdge query server of your choosing. This is the default local endpoint. + --log-prompts \ + --log-stat + ``` + + ## Usage Example - [Execute](#execute) the server @@ -580,6 +614,8 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "What is the location of Paris, France along the Seine River?"}], "model":"Llama-2-7b-chat-hf-Q5_K_M"}' ``` +Internet search will only be used if the question cannot be answered using RAG. If it is needed, the user message will be queried to the `/query/summarize` endpoint on the [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance, where the server will respond with the summary of the internet search results if it decides it is necessary. + ## Set Log Level You can set the log level of the API server by setting the `LLAMA_LOG` environment variable. For example, to set the log level to `debug`, you can run the following command: From 94a4c09bc19564228b1e490e03727697f8bfc0db Mon Sep 17 00:00:00 2001 From: suryansh <118013430+suryyyansh@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:24:26 +0530 Subject: [PATCH 08/11] Update README.md --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4e89214..2489f51 100644 --- a/README.md +++ b/README.md @@ -564,17 +564,17 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht - Start an instance of LlamaEdge-RAG API server - ```bash - wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \ - --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \ - rag-api-server.wasm \ - --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \ - --ctx-size 4096,384 \ - --prompt-template llama-2-chat,embedding \ - --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \ - --log-prompts \ - --log-stat - ``` + ```bash + wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \ + --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \ + rag-api-server.wasm \ + --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \ + --ctx-size 4096,384 \ + --prompt-template llama-2-chat,embedding \ + --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \ + --log-prompts \ + --log-stat + ``` ### Start with Internet Search From 727a306f46916492a177cbbc8c06d9442f6dd81c Mon Sep 17 00:00:00 2001 From: suryansh <118013430+suryyyansh@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:28:31 +0530 Subject: [PATCH 09/11] Update README.md --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 2489f51..10c2617 100644 --- a/README.md +++ b/README.md @@ -526,7 +526,7 @@ To check the CLI options of the `rag-api-server` wasm app, you can run the follo Compiling the server with the `search` feature enabled (using either the `--features search` flag when building or editing `Cargo.toml`), the following extra CLI arguments will be made available: -```bash + ```bash --api-key API key to be supplied to the endpoint, if supported [default: ] @@ -535,7 +535,7 @@ Compiling the server with the `search` feature enabled (using either the `--feat --search-backend The search API backend to use for internet search [default: tavily] -``` + ``` @@ -578,21 +578,21 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht ### Start with Internet Search - - Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally. +- Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally. - ```bash - wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \ - --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \ - rag-api-server.wasm \ - --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \ - --ctx-size 4096,384 \ - --prompt-template llama-2-chat,embedding \ - --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \ - --api-key "xxx" \ # Use if your chosen LlamaEdge query server endpoint requires one. - --query-server-url "http://0.0.0.0:8081/" \ # URL of the LlamaEdge query server of your choosing. This is the default local endpoint. - --log-prompts \ - --log-stat - ``` + ```bash + wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \ + --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \ + rag-api-server.wasm \ + --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \ + --ctx-size 4096,384 \ + --prompt-template llama-2-chat,embedding \ + --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \ + --api-key "xxx" \ # Use if your chosen LlamaEdge query server endpoint requires one. + --query-server-url "http://0.0.0.0:8081/" \ # URL of the LlamaEdge query server of your choosing. This is the default local endpoint. + --log-prompts \ + --log-stat + ``` ## Usage Example From 6467af58f343cff0f5cbf1a272878d8c0691a8d1 Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Mon, 2 Sep 2024 23:03:49 +0530 Subject: [PATCH 10/11] annotated SEARCH_ARGUMENTS and fixed typo Signed-off-by: suryyyansh --- src/main.rs | 2 +- src/utils.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 24cb70f..606bd5d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -480,7 +480,7 @@ async fn main() -> Result<(), ServerError> { SEARCH_ARGUMENTS .set(search_arguments) - .map_err(|_| ServerError::Operation("Failed to set `SERVER_INFO`.".to_string()))?; + .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_string()))?; } // let server = Server::bind(&addr).serve(new_service); diff --git a/src/utils.rs b/src/utils.rs index 95050ab..d973680 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -11,6 +11,7 @@ pub(crate) fn gen_chat_id() -> String { /// Search related items that aren't directly supported by SearchConfig #[cfg(feature = "search")] +#[derive(Debug)] pub(crate) struct SearchArguments { /// API key to be supplied to the endpoint, if supported. Not used by Bing. pub(crate) api_key: String, From de6684fd182d648d48102a25decbc160e2e98146 Mon Sep 17 00:00:00 2001 From: suryyyansh Date: Thu, 5 Sep 2024 19:23:27 +0530 Subject: [PATCH 11/11] Made search fallback to RAG incase the query server is inaccessible Signed-off-by: suryyyansh --- src/backend/ggml.rs | 88 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs index 0644e49..d2a69e2 100644 --- a/src/backend/ggml.rs +++ b/src/backend/ggml.rs @@ -491,7 +491,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response { Ok(se) => se, Err(_) => { - let err_msg = "couldn't build summary_endpoint from query_server_url".to_string(); + let err_msg = "Couldn't build summary_endpoint from query_server_url".to_string(); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); @@ -515,7 +515,7 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response )) { Ok(request) => request, Err(_) => { - let err_msg = "failed to build request to LLamaEdge query server.".to_string(); + let err_msg = "Failed to build request to LLamaEdge query server.".to_string(); error!(target: "stdout", "{}", &err_msg); return error::internal_server_error(err_msg); } @@ -524,55 +524,57 @@ pub(crate) async fn rag_query_handler(mut req: Request) -> Response info!(target: "stdout", "Querying the LlamaEdge query server."); let client = hyper::client::Client::new(); - let res = match client.request(req).await { - Ok(response) => response, - Err(e) => { - let err_msg = format!("couldn't make request to LlamaEdge query server: {}", e); - error!(target: "stdout", "{}", &err_msg); - - return error::internal_server_error(err_msg); - } - }; - - let is_success = res.status().is_success(); + match client.request(req).await { + Ok(res) => { + let is_success = res.status().is_success(); - let body_bytes = match hyper::body::to_bytes(res.into_body()).await { - Ok(bytes) => bytes, - Err(e) => { - let err_msg = format!("couldn't convert body into bytes: {}", e); - error!(target: "stdout", "{}", &err_msg); + let body_bytes = match hyper::body::to_bytes(res.into_body()).await { + Ok(bytes) => bytes, + Err(e) => { + let err_msg = format!("Couldn't convert body into bytes: {}", e); + error!(target: "stdout", "{}", &err_msg); - return error::internal_server_error(err_msg); - } - }; + return error::internal_server_error(err_msg); + } + }; - let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) { - Ok(json) => json, - Err(e) => { - let err_msg = format!("couldn't convert body into json: {}", e); - error!(target: "stdout", "{}", &err_msg); + let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) { + Ok(json) => json, + Err(e) => { + let err_msg = format!("Couldn't convert body into json: {}", e); + error!(target: "stdout", "{}", &err_msg); - return error::internal_server_error(err_msg); - } - }; + return error::internal_server_error(err_msg); + } + }; - info!(target: "stdout", "processed query server response json body: \n{}", body_json); + info!(target: "stdout", "processed query server response json body: \n{}", body_json); - // if the request is a success, check decision and inject results accordingly. - if is_success && body_json["decision"].as_bool().unwrap_or(true) { - // the logic to ensure "results" is a serde_json::Value::String is present on the - // llamaedge-query-server. - let results = body_json["results"].as_str().unwrap_or(""); + // if the request is a success, check decision and inject results accordingly. + if is_success && body_json["decision"].as_bool().unwrap_or(true) { + // the logic to ensure "results" is a serde_json::Value::String is present on the + // llamaedge-query-server. + let results = body_json["results"].as_str().unwrap_or(""); - //inject search results - let system_search_result_message: ChatCompletionRequestMessage = - ChatCompletionRequestMessage::new_system_message(results, None); + info!(target: "stdout", "injecting search summary into conversation context."); + //inject search results + let system_search_result_message: ChatCompletionRequestMessage = + ChatCompletionRequestMessage::new_system_message(results, None); - chat_request.messages.insert( - chat_request.messages.len() - 1, - system_search_result_message, - ) - } + chat_request.messages.insert( + chat_request.messages.len() - 1, + system_search_result_message, + ) + } + } + Err(e) => { + let err_msg = format!( + "Couldn't make request to LlamaEdge query server, switching to regular RAG: {}", + e + ); + warn!(target: "stdout", "{}", &err_msg); + } + }; } // chat completion