From 89578408a29447798edfe5438d141a4f7d26cc2a Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Wed, 28 Aug 2024 04:05:36 +0530
Subject: [PATCH 01/11] basic search functionality added. Triggers when the RAG
 query returns no results

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 Cargo.toml                        |   6 +-
 src/backend/ggml.rs               |  29 ++++++++
 src/error.rs                      |   3 +
 src/main.rs                       |  89 ++++++++++++++++++++++++
 src/search/bing_search.rs         |  56 +++++++++++++++
 src/search/local_google_search.rs |  40 +++++++++++
 src/search/mod.rs                 | 110 ++++++++++++++++++++++++++++++
 src/search/tavily_search.rs       |  44 ++++++++++++
 src/utils.rs                      |  11 +++
 9 files changed, 387 insertions(+), 1 deletion(-)
 create mode 100644 src/search/bing_search.rs
 create mode 100644 src/search/local_google_search.rs
 create mode 100644 src/search/mod.rs
 create mode 100644 src/search/tavily_search.rs
diff --git a/Cargo.toml b/Cargo.toml
index 92d5498..b492325 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,9 @@ version = "0.9.3"
 edition = "2021"
 
 [dependencies]
+endpoints = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"}
+llama-core = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev", features = ["full"]}
+chat-prompts = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"}
 anyhow = "1.0.80"
 chat-prompts = { version = "=0.14.0" }
 chrono = "0.4.38"
@@ -35,4 +38,5 @@ hyper = { git = "https://github.com/second-state/wasi_hyper.git", branch = "v0.1
 tokio = { git = "https://github.com/second-state/wasi_tokio.git", branch = "v1.36.x" }
 
 [features]
-default = []
+default = ["search"]
+search = []
diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
index 4f2285d..b8d9d63 100644
--- a/src/backend/ggml.rs
+++ b/src/backend/ggml.rs
@@ -1,3 +1,5 @@
+#[cfg(feature = "search")]
+use crate::search::*;
 use crate::{error, utils::gen_chat_id, GLOBAL_RAG_PROMPT, SERVER_INFO};
 use chat_prompts::{error as ChatPromptsError, MergeRagContext, MergeRagContextPolicy};
 use endpoints::{
@@ -372,6 +374,9 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         }
     };
 
+    #[cfg(feature = "search")]
+    let mut web_search_allowed: bool = false;
+
     if let Some(ro) = res {
         match ro.points {
             Some(scored_points) => {
@@ -379,6 +384,11 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
                     true => {
                         // log
                         warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold));
+                        #[cfg(feature = "search")]
+                        {
+                            info!(target: "stdout", "No points retrieved, enabling web search.");
+                            web_search_allowed = true;
+                        }
                     }
                     false => {
                         // update messages with retrieved context
@@ -435,10 +445,29 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
                 // log
                 warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold
                 ));
+
+                #[cfg(feature = "search")]
+                {
+                    info!(target: "stdout", "No points retrieved, enabling web search.");
+                    web_search_allowed = true;
+                }
             }
         }
     }
 
+    #[cfg(feature = "search")]
+    if web_search_allowed {
+        // TODO: check the llamaedge-query-server if the current user query could use an internet search.
+
+        info!(target: "stdout", "Performing web search.");
+        if let Err(e) = insert_search_results(&mut chat_request).await {
+            let err_msg = "encountered an error while appending search results.".to_string();
+            // log
+            error!(target: "stdout", "{}", &err_msg);
+            return e;
+        }
+    }
+
     // chat completion
     let res = match llama_core::chat::chat(&mut chat_request).await {
         Ok(result) => match result {
diff --git a/src/error.rs b/src/error.rs
index cd3ac19..a96c956 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -82,4 +82,7 @@ pub enum ServerError {
     ArgumentError(String),
     #[error("{0}")]
     Operation(String),
+    /// Conversion error when converting to SearchOutput
+    #[error("{0}")]
+    SearchConversionError(String),
 }
diff --git a/src/main.rs b/src/main.rs
index 4f8ab6e..7693b82 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,6 +3,8 @@ extern crate log;
 
 mod backend;
 mod error;
+#[cfg(feature = "search")]
+mod search;
 mod utils;
 
 use anyhow::Result;
@@ -16,11 +18,17 @@ use hyper::{
     service::{make_service_fn, service_fn},
     Body, Request, Response, Server, StatusCode,
 };
+#[cfg(feature = "search")]
+use llama_core::search::{ContentType, SearchConfig};
 use llama_core::MetadataBuilder;
 use once_cell::sync::OnceCell;
+#[cfg(feature = "search")]
+use search::*;
 use serde::{Deserialize, Serialize};
 use std::{collections::HashMap, net::SocketAddr, path::PathBuf};
 use tokio::net::TcpListener;
+#[cfg(feature = "search")]
+use utils::SearchArguments;
 use utils::{is_valid_url, LogLevel};
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
@@ -29,6 +37,12 @@ type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 pub(crate) static GLOBAL_RAG_PROMPT: OnceCell<String> = OnceCell::new();
 // server info
 pub(crate) static SERVER_INFO: OnceCell<ServerInfo> = OnceCell::new();
+// default SearchConfig
+#[cfg(feature = "search")]
+pub(crate) static SEARCH_CONFIG: OnceCell<SearchConfig> = OnceCell::new();
+// search related arguments passed on the command line
+#[cfg(feature = "search")]
+pub(crate) static SEARCH_ARGUMENTS: OnceCell<SearchArguments> = OnceCell::new();
 
 // default socket address
 const DEFAULT_SOCKET_ADDRESS: &str = "0.0.0.0:8080";
@@ -127,6 +141,24 @@ struct Cli {
     /// Deprecated. Print all log information to stdout
     #[arg(long)]
     log_all: bool,
+    /// Maximum number search results to use.
+    #[arg(long, default_value = "5")]
+    max_search_results: u8,
+    /// Size to clip every result to.
+    #[arg(long, default_value = "300")]
+    size_limit_per_result: u16,
+    /// API key to be supplied to the endpoint, if supported.
+    #[arg(long, default_value = "")]
+    api_key: String,
+    /// System prompt explut ChatCompletionRequest: &aining to the LLM how to interpret search results.
+    #[arg(
+        long,
+        default_value = "You found the following search results on the internet. Use them to answer the user's query.\n\n"
+    )]
+    search_prompt: String,
+    /// API key to be supplied to the endpoint, if supported.
+    #[arg(long)]
+    summarize: bool,
 }
 
 #[tokio::main(flavor = "current_thread")]
@@ -421,6 +453,63 @@ async fn main() -> Result<(), ServerError> {
         info!(target: "stdout", "gaianet_node_version: {}", node.as_ref().unwrap());
     }
 
+    // setup search items
+    #[cfg(feature = "search")]
+    {
+        // by default, we will use Tavily.
+        let tavily_config = llama_core::search::SearchConfig::new(
+            "tavily".to_owned(),
+            cli.max_search_results,
+            cli.size_limit_per_result,
+            "https://api.tavily.com/search".to_owned(),
+            ContentType::JSON,
+            ContentType::JSON,
+            "POST".to_owned(),
+            None,
+            tavily_search::tavily_parser,
+            None,
+            None,
+        );
+
+        SEARCH_CONFIG
+            .set(tavily_config)
+            .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?;
+
+        // Bing Search:
+        //
+        // let mut additional_headers = HashMap::new();
+        // additional_headers.insert("Ocp-Apim-Subscription-Key".to_string(), cli.api_key.clone());
+        //
+        // let bing_config = llama_core::search::SearchConfig::new(
+        //     "bing".to_owned(),
+        //     cli.max_search_results,
+        //     cli.size_limit_per_result,
+        //     // use of https requires the "full" or "https" feature
+        //     "https://api.bing.microsoft.com/v7.0/search".to_owned(),
+        //     ContentType::JSON,
+        //     ContentType::JSON,
+        //     "GET".to_owned(),
+        //     Some(additional_headers),
+        //     bing_search::bing_parser,
+        //     None,
+        //     None,
+        // );
+        //
+        // SEARCH_CONFIG
+        //     .set(bing_config)
+        //     .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?;
+
+        let search_arguments = SearchArguments {
+            api_key: cli.api_key.clone(),
+            search_prompt: cli.search_prompt.clone(),
+            summarize: cli.summarize,
+        };
+
+        SEARCH_ARGUMENTS
+            .set(search_arguments)
+            .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_owned()))?;
+    }
+
     // create server info
     let server_info = ServerInfo {
         node,
diff --git a/src/search/bing_search.rs b/src/search/bing_search.rs
new file mode 100644
index 0000000..56caf1b
--- /dev/null
+++ b/src/search/bing_search.rs
@@ -0,0 +1,56 @@
+use crate::error::ServerError;
+use llama_core::search::{SearchOutput, SearchResult};
+use serde::Serialize;
+
+// Note: bing also requires the `Ocp-Apim-Subscription-Key` header: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/headers
+
+#[allow(non_snake_case)]
+#[derive(Serialize)]
+pub struct BingSearchInput {
+    /// The number of search results to return in the response. The default is 10 and the maximum value is 50. The actual number delivered may be less than requested.
+    pub count: u8,
+    /// The user's search query term. The term may not be empty.
+    pub q: String,
+    /// FIlter list for responses useful to the LLM.
+    pub responseFilter: String,
+}
+
+#[allow(dead_code)]
+pub fn bing_parser(
+    raw_results: &serde_json::Value,
+) -> Result<SearchOutput, Box<dyn std::error::Error>> {
+    println!("\n\n\n RAW RESULTS: \n\n\n {}", raw_results.to_string());
+
+    // parse webpages
+    let web_pages_object = match raw_results["webPages"].is_object() {
+        true => match raw_results["webPages"]["value"].as_array() {
+            Some(value) => value,
+            None => {
+                let msg = r#"could not convert the "value" field of "webPages" to an array"#;
+                error!(target: "bing_parser", "bing_parser: {}", msg);
+                return Err(Box::new(ServerError::SearchConversionError(
+                    msg.to_string(),
+                )));
+            }
+        },
+        false => {
+            let msg = "no webpages found when parsing query.";
+            error!(target: "bing_parser", "bing_parser: {}", msg);
+            return Err(Box::new(ServerError::SearchConversionError(
+                msg.to_string(),
+            )));
+        }
+    };
+
+    let mut results = Vec::new();
+    for result in web_pages_object {
+        let current_result = SearchResult {
+            url: result["url"].to_string(),
+            site_name: result["siteName"].to_string(),
+            text_content: result["snippet"].to_string(),
+        };
+        results.push(current_result);
+    }
+
+    Ok(SearchOutput { results })
+}
diff --git a/src/search/local_google_search.rs b/src/search/local_google_search.rs
new file mode 100644
index 0000000..7f2b47a
--- /dev/null
+++ b/src/search/local_google_search.rs
@@ -0,0 +1,40 @@
+use crate::error::ServerError;
+use llama_core::search::{SearchOutput, SearchResult};
+use serde::Serialize;
+
+#[allow(non_snake_case)]
+#[derive(Serialize)]
+pub struct LocalGoogleSearchInput {
+    pub term: String,
+    pub engine: String,
+    pub maxSearchResults: u8,
+}
+
+#[allow(dead_code)]
+pub fn local_google_parser(
+    raw_results: &serde_json::Value,
+) -> Result<SearchOutput, Box<dyn std::error::Error>> {
+    let results_array = match raw_results.as_array() {
+        Some(array) => array,
+        None => {
+            let msg = "No results returned from server";
+            error!(target: "search_server", "google_parser: {}", msg);
+            return Err(Box::new(ServerError::SearchConversionError(
+                msg.to_string(),
+            )));
+        }
+    };
+
+    let mut results = Vec::new();
+
+    for result in results_array {
+        let current_result = SearchResult {
+            url: result["url"].to_string(),
+            site_name: result["siteName"].to_string(),
+            text_content: result["textContent"].to_string(),
+        };
+        results.push(current_result)
+    }
+
+    Ok(SearchOutput { results })
+}
diff --git a/src/search/mod.rs b/src/search/mod.rs
new file mode 100644
index 0000000..f13393b
--- /dev/null
+++ b/src/search/mod.rs
@@ -0,0 +1,110 @@
+pub mod bing_search;
+pub mod local_google_search;
+pub mod tavily_search;
+
+use crate::{error, SEARCH_ARGUMENTS, SEARCH_CONFIG};
+use endpoints::chat::{
+    ChatCompletionRequest, ChatCompletionRequestMessage, ChatCompletionSystemMessage,
+    ChatCompletionUserMessageContent, ContentPart,
+};
+
+#[allow(dead_code)]
+pub(crate) async fn insert_search_results(
+    chat_request: &mut ChatCompletionRequest,
+) -> Result<(), hyper::Response<hyper::Body>> {
+    let search_arguments = match SEARCH_ARGUMENTS.get() {
+        Some(sa) => sa,
+        None => {
+            return Err(error::internal_server_error(
+                "Failed to get `SEARCH_ARGUMENTS`. Was it set?",
+            ));
+        }
+    };
+
+    if let Some(ChatCompletionRequestMessage::User(ref message)) = chat_request.messages.last() {
+        let search_config = match SEARCH_CONFIG.get() {
+            Some(sc) => sc,
+            None => {
+                let err_msg = format!("Failed to obtain SEARCH_CONFIG. Was it set?");
+                error!(target: "insert_search_results", "{}", &err_msg);
+
+                return Err(error::internal_server_error(err_msg));
+            }
+        };
+        info!(target: "insert_search_results", "performing search");
+
+        let user_message_content = match message.content() {
+            ChatCompletionUserMessageContent::Text(message) => message.to_owned(),
+            ChatCompletionUserMessageContent::Parts(parts) => {
+                let mut message: String = "".to_owned();
+                for part in parts {
+                    match part {
+                        ContentPart::Text(message_part) => {
+                            message.push_str(message_part.text());
+                        }
+                        ContentPart::Image(_) => {}
+                    }
+                }
+                message
+            }
+        };
+
+        // set search input.
+        let search_input = tavily_search::TavilySearchInput {
+            api_key: search_arguments.api_key.to_owned(),
+            include_answer: false,
+            include_images: false,
+            query: user_message_content,
+            max_results: search_config.max_search_results,
+            include_raw_content: false,
+            search_depth: "advanced".to_owned(),
+        };
+
+        // Prepare the final `results` string for use as input.
+        let mut results = search_arguments.search_prompt.clone();
+
+        match search_arguments.summarize {
+            true => {
+                match search_config.summarize_search(&search_input).await {
+                    // Append the result summary to the search prompt.
+                    Ok(search_summary) => results += search_summary.as_str(),
+                    Err(e) => {
+                        let err_msg = format!(
+                            "Failed to performing summarized search on SEACH_CONFIG {msg}",
+                            msg = e
+                        );
+                        error!(target: "insert_search_results", "{}", &err_msg);
+
+                        return Err(error::internal_server_error(err_msg));
+                    }
+                };
+            }
+            false => {
+                let search_output: llama_core::search::SearchOutput =
+                    match search_config.perform_search(&search_input).await {
+                        Ok(search_output) => search_output,
+                        Err(e) => {
+                            let err_msg =
+                                format!("Failed to perform search on SEACH_CONFIG: {msg}", msg = e);
+                            error!(target: "insert_search_results", "{}", &err_msg);
+
+                            return Err(error::internal_server_error(err_msg));
+                        }
+                    };
+
+                for result in search_output.results {
+                    results.push_str(result.text_content.as_str());
+                    results.push_str("\n\n");
+                }
+            }
+        }
+
+        let system_search_result_message = ChatCompletionSystemMessage::new(results, None);
+
+        chat_request.messages.insert(
+            chat_request.messages.len() - 1,
+            ChatCompletionRequestMessage::System(system_search_result_message),
+        )
+    }
+    Ok(())
+}
diff --git a/src/search/tavily_search.rs b/src/search/tavily_search.rs
new file mode 100644
index 0000000..594b9fe
--- /dev/null
+++ b/src/search/tavily_search.rs
@@ -0,0 +1,44 @@
+use crate::error::ServerError;
+use llama_core::search::{SearchOutput, SearchResult};
+use serde::Serialize;
+
+#[allow(non_snake_case)]
+#[derive(Serialize)]
+pub struct TavilySearchInput {
+    pub api_key: String,
+    pub include_answer: bool,
+    pub include_images: bool,
+    pub query: String,
+    pub max_results: u8,
+    pub include_raw_content: bool,
+    pub search_depth: String,
+}
+
+#[allow(dead_code)]
+pub fn tavily_parser(
+    raw_results: &serde_json::Value,
+) -> Result<SearchOutput, Box<dyn std::error::Error>> {
+    let results_array = match raw_results["results"].as_array() {
+        Some(array) => array,
+        None => {
+            let msg = "No results returned from server";
+            error!(target: "search_server", "google_parser: {}", msg);
+            return Err(Box::new(ServerError::SearchConversionError(
+                msg.to_string(),
+            )));
+        }
+    };
+
+    let mut results = Vec::new();
+
+    for result in results_array {
+        let current_result = SearchResult {
+            url: result["url"].to_string(),
+            site_name: result["title"].to_string(),
+            text_content: result["content"].to_string(),
+        };
+        results.push(current_result)
+    }
+
+    Ok(SearchOutput { results })
+}
diff --git a/src/utils.rs b/src/utils.rs
index 837da3f..7bc6edf 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -9,6 +9,17 @@ pub(crate) fn gen_chat_id() -> String {
     format!("chatcmpl-{}", uuid::Uuid::new_v4())
 }
 
+/// Search related items that aren't directly supported by SearchConfig
+#[cfg(feature = "search")]
+pub(crate) struct SearchArguments {
+    /// API key to be supplied to the endpoint, if supported. Not used by Bing.
+    pub(crate) api_key: String,
+    /// System prompt explaining to the LLM how to interpret search results.
+    pub(crate) search_prompt: String,
+    /// Whether to summarize the search results before using them.
+    pub(crate) summarize: bool,
+}
+
 #[derive(
     Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum, Serialize, Deserialize,
 )]

From fd54590cd2d7dc385dcec1da3eacfcaa2646fa3c Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Fri, 30 Aug 2024 01:13:35 +0530
Subject: [PATCH 02/11] Added search functionality with llamaedge query server

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/backend/ggml.rs               | 133 ++++++++++++++++++++++++++++--
 src/error.rs                      |   3 -
 src/main.rs                       | 106 ++++++------------------
 src/search/bing_search.rs         |  56 -------------
 src/search/local_google_search.rs |  40 ---------
 src/search/mod.rs                 | 110 ------------------------
 src/search/tavily_search.rs       |  44 ----------
 src/utils.rs                      |   8 +-
 8 files changed, 152 insertions(+), 348 deletions(-)
 delete mode 100644 src/search/bing_search.rs
 delete mode 100644 src/search/local_google_search.rs
 delete mode 100644 src/search/mod.rs
 delete mode 100644 src/search/tavily_search.rs

diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
index b8d9d63..dfa646f 100644
--- a/src/backend/ggml.rs
+++ b/src/backend/ggml.rs
@@ -1,5 +1,3 @@
-#[cfg(feature = "search")]
-use crate::search::*;
 use crate::{error, utils::gen_chat_id, GLOBAL_RAG_PROMPT, SERVER_INFO};
 use chat_prompts::{error as ChatPromptsError, MergeRagContext, MergeRagContextPolicy};
 use endpoints::{
@@ -263,6 +261,9 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
 
     info!(target: "stdout", "Compute embeddings for user query.");
 
+    #[cfg(feature = "search")]
+    let query: String;
+
     // * compute embeddings for user query
     let embedding_response = match chat_request.messages.is_empty() {
         true => {
@@ -289,6 +290,10 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
                         }
                     };
 
+                    #[cfg(feature = "search")]
+                    {
+                        query = query_text.clone();
+                    }
                     // log
                     info!(target: "stdout", "query text: {}", query_text);
 
@@ -384,6 +389,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
                     true => {
                         // log
                         warn!(target: "stdout", "{}", format!("No point retrieved (score < threshold {})", server_info.qdrant_config.score_threshold));
+
                         #[cfg(feature = "search")]
                         {
                             info!(target: "stdout", "No points retrieved, enabling web search.");
@@ -457,14 +463,123 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
 
     #[cfg(feature = "search")]
     if web_search_allowed {
-        // TODO: check the llamaedge-query-server if the current user query could use an internet search.
+        let search_arguments = match crate::SEARCH_ARGUMENTS.get() {
+            Some(sc) => sc,
+            None => {
+                let err_msg = "Failed to obtain SEARCH_ARGUMENTS. Was it set?".to_string();
+                error!(target: "stdout", "{}", &err_msg);
 
-        info!(target: "stdout", "Performing web search.");
-        if let Err(e) = insert_search_results(&mut chat_request).await {
-            let err_msg = "encountered an error while appending search results.".to_string();
-            // log
-            error!(target: "stdout", "{}", &err_msg);
-            return e;
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        let endpoint: hyper::Uri = match search_arguments.query_server_url.parse() {
+            Ok(uri) => uri,
+            Err(e) => {
+                let err_msg = format!(
+                    "LlamaEdge Query server URL could not be parsed: {}",
+                    e.to_string()
+                );
+                error!(target: "stdout", "{}", &err_msg);
+
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        let summary_endpoint = match hyper::Uri::builder()
+            .scheme(endpoint.scheme().unwrap().to_string().as_str())
+            .authority(endpoint.authority().unwrap().to_string().as_str())
+            .path_and_query("/test/summarize")
+            .build()
+        {
+            Ok(se) => se,
+            Err(_) => {
+                let err_msg = "couldn't build summary_endpoint from query_server_url".to_string();
+                error!(target: "stdout", "{}", &err_msg);
+
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        //perform query, extract summary, add to
+        let req = match Request::builder()
+            .method(Method::POST)
+            .uri(summary_endpoint)
+            .header("content-type", "application/json")
+            .body(Body::from(
+                serde_json::json!({
+                    "search_config" : {
+                    "api_key": search_arguments.api_key,
+                    },
+                    "backend": search_arguments.search_backend,
+                    "query": query,
+                })
+                .to_string(),
+            )) {
+            Ok(request) => request,
+            Err(_) => {
+                let err_msg = "failed to build request to LLamaEdge query server.".to_string();
+                error!(target: "stdout", "{}", &err_msg);
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        info!(target: "stdout", "Querying the LlamaEdge query server.");
+
+        let client = hyper::client::Client::new();
+        let res = match client.request(req).await {
+            Ok(response) => response,
+            Err(e) => {
+                let err_msg = format!(
+                    "couldn't make request to LlamaEdge query server: {}",
+                    e.to_string()
+                );
+                error!(target: "stdout", "{}", &err_msg);
+
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        let is_success = res.status().is_success();
+
+        let body_bytes = match hyper::body::to_bytes(res.into_body()).await {
+            Ok(bytes) => bytes,
+            Err(e) => {
+                let err_msg = format!("couldn't convert body into bytes: {}", e.to_string());
+                error!(target: "stdout", "{}", &err_msg);
+
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) {
+            Ok(json) => json,
+            Err(e) => {
+                let err_msg = format!("couldn't convert body into json: {}", e.to_string());
+                error!(target: "stdout", "{}", &err_msg);
+
+                return error::internal_server_error(err_msg);
+            }
+        };
+
+        info!(target: "stdout", "processed query server response json body: \n{}", body_json);
+
+        // if the request is a success, check decision and inject results accordingly.
+        if is_success {
+            if body_json["decision"].as_bool().unwrap_or(true) {
+                // the logic to ensure "results" is a serde_json::Value::String is present on the
+                // llamaedge-query-server.
+                let results = body_json["results"].as_str().unwrap_or("");
+
+                //inject search results
+                let system_search_result_message: ChatCompletionRequestMessage =
+                    ChatCompletionRequestMessage::new_system_message(results, None);
+
+                chat_request.messages.insert(
+                    chat_request.messages.len() - 1,
+                    system_search_result_message,
+                )
+            }
         }
     }
 
diff --git a/src/error.rs b/src/error.rs
index a96c956..cd3ac19 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -82,7 +82,4 @@ pub enum ServerError {
     ArgumentError(String),
     #[error("{0}")]
     Operation(String),
-    /// Conversion error when converting to SearchOutput
-    #[error("{0}")]
-    SearchConversionError(String),
 }
diff --git a/src/main.rs b/src/main.rs
index 7693b82..ab03ed6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,8 +3,7 @@ extern crate log;
 
 mod backend;
 mod error;
-#[cfg(feature = "search")]
-mod search;
+
 mod utils;
 
 use anyhow::Result;
@@ -18,12 +17,8 @@ use hyper::{
     service::{make_service_fn, service_fn},
     Body, Request, Response, Server, StatusCode,
 };
-#[cfg(feature = "search")]
-use llama_core::search::{ContentType, SearchConfig};
 use llama_core::MetadataBuilder;
 use once_cell::sync::OnceCell;
-#[cfg(feature = "search")]
-use search::*;
 use serde::{Deserialize, Serialize};
 use std::{collections::HashMap, net::SocketAddr, path::PathBuf};
 use tokio::net::TcpListener;
@@ -37,10 +32,7 @@ type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 pub(crate) static GLOBAL_RAG_PROMPT: OnceCell<String> = OnceCell::new();
 // server info
 pub(crate) static SERVER_INFO: OnceCell<ServerInfo> = OnceCell::new();
-// default SearchConfig
-#[cfg(feature = "search")]
-pub(crate) static SEARCH_CONFIG: OnceCell<SearchConfig> = OnceCell::new();
-// search related arguments passed on the command line
+// search cli arguments
 #[cfg(feature = "search")]
 pub(crate) static SEARCH_ARGUMENTS: OnceCell<SearchArguments> = OnceCell::new();
 
@@ -141,24 +133,18 @@ struct Cli {
     /// Deprecated. Print all log information to stdout
     #[arg(long)]
     log_all: bool,
-    /// Maximum number search results to use.
-    #[arg(long, default_value = "5")]
-    max_search_results: u8,
-    /// Size to clip every result to.
-    #[arg(long, default_value = "300")]
-    size_limit_per_result: u16,
     /// API key to be supplied to the endpoint, if supported.
+    #[cfg(feature = "search")]
     #[arg(long, default_value = "")]
     api_key: String,
-    /// System prompt explut ChatCompletionRequest: &aining to the LLM how to interpret search results.
-    #[arg(
-        long,
-        default_value = "You found the following search results on the internet. Use them to answer the user's query.\n\n"
-    )]
-    search_prompt: String,
-    /// API key to be supplied to the endpoint, if supported.
-    #[arg(long)]
-    summarize: bool,
+    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    #[cfg(feature = "search")]
+    #[arg(long, required = true)]
+    query_server_url: String,
+    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    #[cfg(feature = "search")]
+    #[arg(long, default_value = "tavily", requires = "query-server-url")]
+    search_backend: String,
 }
 
 #[tokio::main(flavor = "current_thread")]
@@ -453,63 +439,6 @@ async fn main() -> Result<(), ServerError> {
         info!(target: "stdout", "gaianet_node_version: {}", node.as_ref().unwrap());
     }
 
-    // setup search items
-    #[cfg(feature = "search")]
-    {
-        // by default, we will use Tavily.
-        let tavily_config = llama_core::search::SearchConfig::new(
-            "tavily".to_owned(),
-            cli.max_search_results,
-            cli.size_limit_per_result,
-            "https://api.tavily.com/search".to_owned(),
-            ContentType::JSON,
-            ContentType::JSON,
-            "POST".to_owned(),
-            None,
-            tavily_search::tavily_parser,
-            None,
-            None,
-        );
-
-        SEARCH_CONFIG
-            .set(tavily_config)
-            .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?;
-
-        // Bing Search:
-        //
-        // let mut additional_headers = HashMap::new();
-        // additional_headers.insert("Ocp-Apim-Subscription-Key".to_string(), cli.api_key.clone());
-        //
-        // let bing_config = llama_core::search::SearchConfig::new(
-        //     "bing".to_owned(),
-        //     cli.max_search_results,
-        //     cli.size_limit_per_result,
-        //     // use of https requires the "full" or "https" feature
-        //     "https://api.bing.microsoft.com/v7.0/search".to_owned(),
-        //     ContentType::JSON,
-        //     ContentType::JSON,
-        //     "GET".to_owned(),
-        //     Some(additional_headers),
-        //     bing_search::bing_parser,
-        //     None,
-        //     None,
-        // );
-        //
-        // SEARCH_CONFIG
-        //     .set(bing_config)
-        //     .map_err(|_| ServerError::Operation("Failed to set `SEARCH_CONFIG`.".to_owned()))?;
-
-        let search_arguments = SearchArguments {
-            api_key: cli.api_key.clone(),
-            search_prompt: cli.search_prompt.clone(),
-            summarize: cli.summarize,
-        };
-
-        SEARCH_ARGUMENTS
-            .set(search_arguments)
-            .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_owned()))?;
-    }
-
     // create server info
     let server_info = ServerInfo {
         node,
@@ -541,6 +470,19 @@ async fn main() -> Result<(), ServerError> {
         }
     });
 
+    #[cfg(feature = "search")]
+    {
+        let search_arguments = SearchArguments {
+            api_key: cli.api_key,
+            query_server_url: cli.query_server_url,
+            search_backend: cli.search_backend,
+        };
+
+        SEARCH_ARGUMENTS
+            .set(search_arguments)
+            .map_err(|_| ServerError::Operation("Failed to set `SERVER_INFO`.".to_string()))?;
+    }
+
     // let server = Server::bind(&addr).serve(new_service);
 
     let tcp_listener = TcpListener::bind(addr).await.unwrap();
diff --git a/src/search/bing_search.rs b/src/search/bing_search.rs
deleted file mode 100644
index 56caf1b..0000000
--- a/src/search/bing_search.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-use crate::error::ServerError;
-use llama_core::search::{SearchOutput, SearchResult};
-use serde::Serialize;
-
-// Note: bing also requires the `Ocp-Apim-Subscription-Key` header: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/headers
-
-#[allow(non_snake_case)]
-#[derive(Serialize)]
-pub struct BingSearchInput {
-    /// The number of search results to return in the response. The default is 10 and the maximum value is 50. The actual number delivered may be less than requested.
-    pub count: u8,
-    /// The user's search query term. The term may not be empty.
-    pub q: String,
-    /// FIlter list for responses useful to the LLM.
-    pub responseFilter: String,
-}
-
-#[allow(dead_code)]
-pub fn bing_parser(
-    raw_results: &serde_json::Value,
-) -> Result<SearchOutput, Box<dyn std::error::Error>> {
-    println!("\n\n\n RAW RESULTS: \n\n\n {}", raw_results.to_string());
-
-    // parse webpages
-    let web_pages_object = match raw_results["webPages"].is_object() {
-        true => match raw_results["webPages"]["value"].as_array() {
-            Some(value) => value,
-            None => {
-                let msg = r#"could not convert the "value" field of "webPages" to an array"#;
-                error!(target: "bing_parser", "bing_parser: {}", msg);
-                return Err(Box::new(ServerError::SearchConversionError(
-                    msg.to_string(),
-                )));
-            }
-        },
-        false => {
-            let msg = "no webpages found when parsing query.";
-            error!(target: "bing_parser", "bing_parser: {}", msg);
-            return Err(Box::new(ServerError::SearchConversionError(
-                msg.to_string(),
-            )));
-        }
-    };
-
-    let mut results = Vec::new();
-    for result in web_pages_object {
-        let current_result = SearchResult {
-            url: result["url"].to_string(),
-            site_name: result["siteName"].to_string(),
-            text_content: result["snippet"].to_string(),
-        };
-        results.push(current_result);
-    }
-
-    Ok(SearchOutput { results })
-}
diff --git a/src/search/local_google_search.rs b/src/search/local_google_search.rs
deleted file mode 100644
index 7f2b47a..0000000
--- a/src/search/local_google_search.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-use crate::error::ServerError;
-use llama_core::search::{SearchOutput, SearchResult};
-use serde::Serialize;
-
-#[allow(non_snake_case)]
-#[derive(Serialize)]
-pub struct LocalGoogleSearchInput {
-    pub term: String,
-    pub engine: String,
-    pub maxSearchResults: u8,
-}
-
-#[allow(dead_code)]
-pub fn local_google_parser(
-    raw_results: &serde_json::Value,
-) -> Result<SearchOutput, Box<dyn std::error::Error>> {
-    let results_array = match raw_results.as_array() {
-        Some(array) => array,
-        None => {
-            let msg = "No results returned from server";
-            error!(target: "search_server", "google_parser: {}", msg);
-            return Err(Box::new(ServerError::SearchConversionError(
-                msg.to_string(),
-            )));
-        }
-    };
-
-    let mut results = Vec::new();
-
-    for result in results_array {
-        let current_result = SearchResult {
-            url: result["url"].to_string(),
-            site_name: result["siteName"].to_string(),
-            text_content: result["textContent"].to_string(),
-        };
-        results.push(current_result)
-    }
-
-    Ok(SearchOutput { results })
-}
diff --git a/src/search/mod.rs b/src/search/mod.rs
deleted file mode 100644
index f13393b..0000000
--- a/src/search/mod.rs
+++ /dev/null
@@ -1,110 +0,0 @@
-pub mod bing_search;
-pub mod local_google_search;
-pub mod tavily_search;
-
-use crate::{error, SEARCH_ARGUMENTS, SEARCH_CONFIG};
-use endpoints::chat::{
-    ChatCompletionRequest, ChatCompletionRequestMessage, ChatCompletionSystemMessage,
-    ChatCompletionUserMessageContent, ContentPart,
-};
-
-#[allow(dead_code)]
-pub(crate) async fn insert_search_results(
-    chat_request: &mut ChatCompletionRequest,
-) -> Result<(), hyper::Response<hyper::Body>> {
-    let search_arguments = match SEARCH_ARGUMENTS.get() {
-        Some(sa) => sa,
-        None => {
-            return Err(error::internal_server_error(
-                "Failed to get `SEARCH_ARGUMENTS`. Was it set?",
-            ));
-        }
-    };
-
-    if let Some(ChatCompletionRequestMessage::User(ref message)) = chat_request.messages.last() {
-        let search_config = match SEARCH_CONFIG.get() {
-            Some(sc) => sc,
-            None => {
-                let err_msg = format!("Failed to obtain SEARCH_CONFIG. Was it set?");
-                error!(target: "insert_search_results", "{}", &err_msg);
-
-                return Err(error::internal_server_error(err_msg));
-            }
-        };
-        info!(target: "insert_search_results", "performing search");
-
-        let user_message_content = match message.content() {
-            ChatCompletionUserMessageContent::Text(message) => message.to_owned(),
-            ChatCompletionUserMessageContent::Parts(parts) => {
-                let mut message: String = "".to_owned();
-                for part in parts {
-                    match part {
-                        ContentPart::Text(message_part) => {
-                            message.push_str(message_part.text());
-                        }
-                        ContentPart::Image(_) => {}
-                    }
-                }
-                message
-            }
-        };
-
-        // set search input.
-        let search_input = tavily_search::TavilySearchInput {
-            api_key: search_arguments.api_key.to_owned(),
-            include_answer: false,
-            include_images: false,
-            query: user_message_content,
-            max_results: search_config.max_search_results,
-            include_raw_content: false,
-            search_depth: "advanced".to_owned(),
-        };
-
-        // Prepare the final `results` string for use as input.
-        let mut results = search_arguments.search_prompt.clone();
-
-        match search_arguments.summarize {
-            true => {
-                match search_config.summarize_search(&search_input).await {
-                    // Append the result summary to the search prompt.
-                    Ok(search_summary) => results += search_summary.as_str(),
-                    Err(e) => {
-                        let err_msg = format!(
-                            "Failed to performing summarized search on SEACH_CONFIG {msg}",
-                            msg = e
-                        );
-                        error!(target: "insert_search_results", "{}", &err_msg);
-
-                        return Err(error::internal_server_error(err_msg));
-                    }
-                };
-            }
-            false => {
-                let search_output: llama_core::search::SearchOutput =
-                    match search_config.perform_search(&search_input).await {
-                        Ok(search_output) => search_output,
-                        Err(e) => {
-                            let err_msg =
-                                format!("Failed to perform search on SEACH_CONFIG: {msg}", msg = e);
-                            error!(target: "insert_search_results", "{}", &err_msg);
-
-                            return Err(error::internal_server_error(err_msg));
-                        }
-                    };
-
-                for result in search_output.results {
-                    results.push_str(result.text_content.as_str());
-                    results.push_str("\n\n");
-                }
-            }
-        }
-
-        let system_search_result_message = ChatCompletionSystemMessage::new(results, None);
-
-        chat_request.messages.insert(
-            chat_request.messages.len() - 1,
-            ChatCompletionRequestMessage::System(system_search_result_message),
-        )
-    }
-    Ok(())
-}
diff --git a/src/search/tavily_search.rs b/src/search/tavily_search.rs
deleted file mode 100644
index 594b9fe..0000000
--- a/src/search/tavily_search.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-use crate::error::ServerError;
-use llama_core::search::{SearchOutput, SearchResult};
-use serde::Serialize;
-
-#[allow(non_snake_case)]
-#[derive(Serialize)]
-pub struct TavilySearchInput {
-    pub api_key: String,
-    pub include_answer: bool,
-    pub include_images: bool,
-    pub query: String,
-    pub max_results: u8,
-    pub include_raw_content: bool,
-    pub search_depth: String,
-}
-
-#[allow(dead_code)]
-pub fn tavily_parser(
-    raw_results: &serde_json::Value,
-) -> Result<SearchOutput, Box<dyn std::error::Error>> {
-    let results_array = match raw_results["results"].as_array() {
-        Some(array) => array,
-        None => {
-            let msg = "No results returned from server";
-            error!(target: "search_server", "google_parser: {}", msg);
-            return Err(Box::new(ServerError::SearchConversionError(
-                msg.to_string(),
-            )));
-        }
-    };
-
-    let mut results = Vec::new();
-
-    for result in results_array {
-        let current_result = SearchResult {
-            url: result["url"].to_string(),
-            site_name: result["title"].to_string(),
-            text_content: result["content"].to_string(),
-        };
-        results.push(current_result)
-    }
-
-    Ok(SearchOutput { results })
-}
diff --git a/src/utils.rs b/src/utils.rs
index 7bc6edf..20caecf 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -14,10 +14,10 @@ pub(crate) fn gen_chat_id() -> String {
 pub(crate) struct SearchArguments {
     /// API key to be supplied to the endpoint, if supported. Not used by Bing.
     pub(crate) api_key: String,
-    /// System prompt explaining to the LLM how to interpret search results.
-    pub(crate) search_prompt: String,
-    /// Whether to summarize the search results before using them.
-    pub(crate) summarize: bool,
+    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    pub(crate) query_server_url: String,
+    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    pub(crate) search_backend: String,
 }
 
 #[derive(

From 9f2c1727206b70de9118838d5e81ef824ae467cd Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Fri, 30 Aug 2024 01:41:26 +0530
Subject: [PATCH 03/11] fix CI warnings

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/backend/ggml.rs | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
index dfa646f..182fd74 100644
--- a/src/backend/ggml.rs
+++ b/src/backend/ggml.rs
@@ -476,10 +476,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         let endpoint: hyper::Uri = match search_arguments.query_server_url.parse() {
             Ok(uri) => uri,
             Err(e) => {
-                let err_msg = format!(
-                    "LlamaEdge Query server URL could not be parsed: {}",
-                    e.to_string()
-                );
+                let err_msg = format!("LlamaEdge Query server URL could not be parsed: {}", e);
                 error!(target: "stdout", "{}", &err_msg);
 
                 return error::internal_server_error(err_msg);
@@ -530,10 +527,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         let res = match client.request(req).await {
             Ok(response) => response,
             Err(e) => {
-                let err_msg = format!(
-                    "couldn't make request to LlamaEdge query server: {}",
-                    e.to_string()
-                );
+                let err_msg = format!("couldn't make request to LlamaEdge query server: {}", e);
                 error!(target: "stdout", "{}", &err_msg);
 
                 return error::internal_server_error(err_msg);
@@ -545,7 +539,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         let body_bytes = match hyper::body::to_bytes(res.into_body()).await {
             Ok(bytes) => bytes,
             Err(e) => {
-                let err_msg = format!("couldn't convert body into bytes: {}", e.to_string());
+                let err_msg = format!("couldn't convert body into bytes: {}", e);
                 error!(target: "stdout", "{}", &err_msg);
 
                 return error::internal_server_error(err_msg);
@@ -555,7 +549,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) {
             Ok(json) => json,
             Err(e) => {
-                let err_msg = format!("couldn't convert body into json: {}", e.to_string());
+                let err_msg = format!("couldn't convert body into json: {}", e);
                 error!(target: "stdout", "{}", &err_msg);
 
                 return error::internal_server_error(err_msg);
@@ -565,21 +559,19 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         info!(target: "stdout", "processed query server response json body: \n{}", body_json);
 
         // if the request is a success, check decision and inject results accordingly.
-        if is_success {
-            if body_json["decision"].as_bool().unwrap_or(true) {
-                // the logic to ensure "results" is a serde_json::Value::String is present on the
-                // llamaedge-query-server.
-                let results = body_json["results"].as_str().unwrap_or("");
-
-                //inject search results
-                let system_search_result_message: ChatCompletionRequestMessage =
-                    ChatCompletionRequestMessage::new_system_message(results, None);
-
-                chat_request.messages.insert(
-                    chat_request.messages.len() - 1,
-                    system_search_result_message,
-                )
-            }
+        if is_success && body_json["decision"].as_bool().unwrap_or(true) {
+            // the logic to ensure "results" is a serde_json::Value::String is present on the
+            // llamaedge-query-server.
+            let results = body_json["results"].as_str().unwrap_or("");
+
+            //inject search results
+            let system_search_result_message: ChatCompletionRequestMessage =
+                ChatCompletionRequestMessage::new_system_message(results, None);
+
+            chat_request.messages.insert(
+                chat_request.messages.len() - 1,
+                system_search_result_message,
+            )
         }
     }
 

From 1077a91700ed8d6ffb924c6f4d0c4ca413da0982 Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Fri, 30 Aug 2024 01:49:16 +0530
Subject: [PATCH 04/11] updated search_backend definition

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/main.rs  | 2 +-
 src/utils.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index ab03ed6..24cb70f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -141,7 +141,7 @@ struct Cli {
     #[cfg(feature = "search")]
     #[arg(long, required = true)]
     query_server_url: String,
-    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    /// The search API backend to use for internet search.
     #[cfg(feature = "search")]
     #[arg(long, default_value = "tavily", requires = "query-server-url")]
     search_backend: String,
diff --git a/src/utils.rs b/src/utils.rs
index 20caecf..0e22ff4 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -16,7 +16,7 @@ pub(crate) struct SearchArguments {
     pub(crate) api_key: String,
     /// The URL for the LlamaEdge query server. Supplying this implies usage.
     pub(crate) query_server_url: String,
-    /// The URL for the LlamaEdge query server. Supplying this implies usage.
+    /// The search API backend to use for requests.
     pub(crate) search_backend: String,
 }
 

From e48939fd9bbe07db7fa514e7272aa08d9e907150 Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Fri, 30 Aug 2024 14:15:22 +0530
Subject: [PATCH 05/11] updated summarization endpoint

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/backend/ggml.rs | 2 +-
 src/utils.rs        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
index 182fd74..0644e49 100644
--- a/src/backend/ggml.rs
+++ b/src/backend/ggml.rs
@@ -486,7 +486,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         let summary_endpoint = match hyper::Uri::builder()
             .scheme(endpoint.scheme().unwrap().to_string().as_str())
             .authority(endpoint.authority().unwrap().to_string().as_str())
-            .path_and_query("/test/summarize")
+            .path_and_query("/query/summarize")
             .build()
         {
             Ok(se) => se,
diff --git a/src/utils.rs b/src/utils.rs
index 0e22ff4..95050ab 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -16,7 +16,7 @@ pub(crate) struct SearchArguments {
     pub(crate) api_key: String,
     /// The URL for the LlamaEdge query server. Supplying this implies usage.
     pub(crate) query_server_url: String,
-    /// The search API backend to use for requests.
+    /// The search API backend to use for internet search.
     pub(crate) search_backend: String,
 }
 

From 9af759b552658d1b20170565d98e97397acaa3d7 Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Mon, 2 Sep 2024 18:11:13 +0530
Subject: [PATCH 06/11] updated dependencies

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 Cargo.toml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index b492325..3e9f2a7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,9 +4,6 @@ version = "0.9.3"
 edition = "2021"
 
 [dependencies]
-endpoints = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"}
-llama-core = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev", features = ["full"]}
-chat-prompts = { git = "https://github.com/suryyyansh/LlamaEdge.git", branch = "dev"}
 anyhow = "1.0.80"
 chat-prompts = { version = "=0.14.0" }
 chrono = "0.4.38"
@@ -38,5 +35,5 @@ hyper = { git = "https://github.com/second-state/wasi_hyper.git", branch = "v0.1
 tokio = { git = "https://github.com/second-state/wasi_tokio.git", branch = "v1.36.x" }
 
 [features]
-default = ["search"]
+default = []
 search = []

From 8b3d4bfff1f64a6802accd5faeb779e7f5243ca7 Mon Sep 17 00:00:00 2001
From: suryansh <118013430+suryyyansh@users.noreply.github.com>
Date: Mon, 2 Sep 2024 19:22:13 +0530
Subject: [PATCH 07/11] Update README.md with search usage instructions

---
 README.md | 48 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index fb8b00e..4e89214 100644
--- a/README.md
+++ b/README.md
@@ -444,16 +444,16 @@ git clone https://github.com/LlamaEdge/rag-api-server.git
 cd rag-api-server
 
 # (Optional) Add the `wasm32-wasi` target to the Rust toolchain
-rustup target add wasm32-wasi
+rustup target add wasm32-wasip1
 
-# Build `rag-api-server.wasm` with the `http` support only, or
-cargo build --target wasm32-wasi --release
+# Build `rag-api-server.wasm` without internet search
+cargo build --target wasm32-wasip1 --release
 
-# Build `rag-api-server.wasm` with both `http` and `https` support
-cargo build --target wasm32-wasi --release --features full
+# Build `rag-api-server.wasm` with internet search capability
+cargo build --target wasm32-wasip1 --release --features search
 
 # Copy the `rag-api-server.wasm` to the root directory
-cp target/wasm32-wasi/release/rag-api-server.wasm .
+cp target/wasm32-wasip1/release/rag-api-server.wasm .
 ```
 
 <details> <summary> To check the CLI options, </summary>
@@ -524,6 +524,19 @@ To check the CLI options of the `rag-api-server` wasm app, you can run the follo
           Print version
   ```
 
+Compiling the server with the `search` feature enabled (using either the `--features search` flag when building or editing `Cargo.toml`), the following extra CLI arguments will be made available:
+
+```bash
+      --api-key <API_KEY>
+          API key to be supplied to the endpoint, if supported
+          [default: ]
+      --query-server-url <QUERY_SERVER_URL>
+          The URL for the LlamaEdge query server. Supplying this implies usage
+      --search-backend <SEARCH_BACKEND>
+          The search API backend to use for internet search
+          [default: tavily]
+```
+
 </details>
 
 ## Execute
@@ -547,6 +560,8 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht
     docker run -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_storage:/qdrant/storage:z qdrant/qdrant
     ```
 
+### Start without Internet Serach
+
 - Start an instance of LlamaEdge-RAG API server
 
   ```bash
@@ -561,6 +576,25 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht
       --log-stat
   ```
 
+### Start with Internet Search
+
+  - Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally.
+
+  ```bash
+  wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \
+      --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \
+      rag-api-server.wasm \
+      --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \
+      --ctx-size 4096,384 \
+      --prompt-template llama-2-chat,embedding \
+      --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \
+      --api-key "xxx" \                               # Use if your chosen LlamaEdge query server endpoint requires one.
+      --query-server-url "http://0.0.0.0:8081/" \     # URL of the LlamaEdge query server of your choosing. This is the default local endpoint.
+      --log-prompts \ 
+      --log-stat
+  ```
+
+
 ## Usage Example
 
 - [Execute](#execute) the server
@@ -580,6 +614,8 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht
         -d '{"messages":[{"role":"system", "content": "You are a helpful assistant."}, {"role":"user", "content": "What is the location of Paris, France along the Seine River?"}], "model":"Llama-2-7b-chat-hf-Q5_K_M"}'
     ```
 
+Internet search will only be used if the question cannot be answered using RAG. If it is needed, the user message will be queried to the `/query/summarize` endpoint on the [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance, where the server will respond with the summary of the internet search results if it decides it is necessary.
+
 ## Set Log Level
 
 You can set the log level of the API server by setting the `LLAMA_LOG` environment variable. For example, to set the log level to `debug`, you can run the following command:

From 94a4c09bc19564228b1e490e03727697f8bfc0db Mon Sep 17 00:00:00 2001
From: suryansh <118013430+suryyyansh@users.noreply.github.com>
Date: Mon, 2 Sep 2024 19:24:26 +0530
Subject: [PATCH 08/11] Update README.md

---
 README.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 4e89214..2489f51 100644
--- a/README.md
+++ b/README.md
@@ -564,17 +564,17 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht
 
 - Start an instance of LlamaEdge-RAG API server
 
-  ```bash
-  wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \
-      --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \
-      rag-api-server.wasm \
-      --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \
-      --ctx-size 4096,384 \
-      --prompt-template llama-2-chat,embedding \
-      --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \
-      --log-prompts \
-      --log-stat
-  ```
+    ```bash
+    wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \
+        --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \
+        rag-api-server.wasm \
+        --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \
+        --ctx-size 4096,384 \
+        --prompt-template llama-2-chat,embedding \
+        --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't  know, don't try to make up an answer.\n----------------\n" \
+        --log-prompts \
+        --log-stat
+    ```
 
 ### Start with Internet Search
 

From 727a306f46916492a177cbbc8c06d9442f6dd81c Mon Sep 17 00:00:00 2001
From: suryansh <118013430+suryyyansh@users.noreply.github.com>
Date: Mon, 2 Sep 2024 19:28:31 +0530
Subject: [PATCH 09/11] Update README.md

---
 README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 2489f51..10c2617 100644
--- a/README.md
+++ b/README.md
@@ -526,7 +526,7 @@ To check the CLI options of the `rag-api-server` wasm app, you can run the follo
 
 Compiling the server with the `search` feature enabled (using either the `--features search` flag when building or editing `Cargo.toml`), the following extra CLI arguments will be made available:
 
-```bash
+  ```bash
       --api-key <API_KEY>
           API key to be supplied to the endpoint, if supported
           [default: ]
@@ -535,7 +535,7 @@ Compiling the server with the `search` feature enabled (using either the `--feat
       --search-backend <SEARCH_BACKEND>
           The search API backend to use for internet search
           [default: tavily]
-```
+  ```
 
 </details>
 
@@ -578,21 +578,21 @@ For the purpose of demonstration, we use the [Llama-2-7b-chat-hf-Q5_K_M.gguf](ht
 
 ### Start with Internet Search
 
-  - Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally.
+- Start an instance of LlamaEdge-RAG API server with URL of your chosen [LlamaEdge Query Server](https://github.com/LlamaEdge/llamaedge-query-server/) instance. The query server can be ran locally.
 
-  ```bash
-  wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \
-      --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \
-      rag-api-server.wasm \
-      --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \
-      --ctx-size 4096,384 \
-      --prompt-template llama-2-chat,embedding \
-      --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \
-      --api-key "xxx" \                               # Use if your chosen LlamaEdge query server endpoint requires one.
-      --query-server-url "http://0.0.0.0:8081/" \     # URL of the LlamaEdge query server of your choosing. This is the default local endpoint.
-      --log-prompts \ 
-      --log-stat
-  ```
+    ```bash
+    wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-2-7b-chat-hf-Q5_K_M.gguf \
+        --nn-preload embedding:GGML:AUTO:all-MiniLM-L6-v2-ggml-model-f16.gguf \
+        rag-api-server.wasm \
+        --model-name Llama-2-7b-chat-hf-Q5_K_M,all-MiniLM-L6-v2-ggml-model-f16 \
+        --ctx-size 4096,384 \
+        --prompt-template llama-2-chat,embedding \
+        --rag-prompt "Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n" \
+        --api-key "xxx" \                               # Use if your chosen LlamaEdge query server endpoint requires one.
+        --query-server-url "http://0.0.0.0:8081/" \     # URL of the LlamaEdge query server of your choosing. This is the default local endpoint.
+        --log-prompts \ 
+        --log-stat
+    ```
 
 
 ## Usage Example

From 6467af58f343cff0f5cbf1a272878d8c0691a8d1 Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Mon, 2 Sep 2024 23:03:49 +0530
Subject: [PATCH 10/11] annotated SEARCH_ARGUMENTS and fixed typo

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/main.rs  | 2 +-
 src/utils.rs | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main.rs b/src/main.rs
index 24cb70f..606bd5d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -480,7 +480,7 @@ async fn main() -> Result<(), ServerError> {
 
         SEARCH_ARGUMENTS
             .set(search_arguments)
-            .map_err(|_| ServerError::Operation("Failed to set `SERVER_INFO`.".to_string()))?;
+            .map_err(|_| ServerError::Operation("Failed to set `SEARCH_ARGUMENTS`.".to_string()))?;
     }
 
     // let server = Server::bind(&addr).serve(new_service);
diff --git a/src/utils.rs b/src/utils.rs
index 95050ab..d973680 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -11,6 +11,7 @@ pub(crate) fn gen_chat_id() -> String {
 
 /// Search related items that aren't directly supported by SearchConfig
 #[cfg(feature = "search")]
+#[derive(Debug)]
 pub(crate) struct SearchArguments {
     /// API key to be supplied to the endpoint, if supported. Not used by Bing.
     pub(crate) api_key: String,

From de6684fd182d648d48102a25decbc160e2e98146 Mon Sep 17 00:00:00 2001
From: suryyyansh <suryansh.arya5472@gmail.com>
Date: Thu, 5 Sep 2024 19:23:27 +0530
Subject: [PATCH 11/11] Made search fallback to RAG incase the query server is
 inaccessible

Signed-off-by: suryyyansh <suryansh.arya5472@gmail.com>
---
 src/backend/ggml.rs | 88 +++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/src/backend/ggml.rs b/src/backend/ggml.rs
index 0644e49..d2a69e2 100644
--- a/src/backend/ggml.rs
+++ b/src/backend/ggml.rs
@@ -491,7 +491,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         {
             Ok(se) => se,
             Err(_) => {
-                let err_msg = "couldn't build summary_endpoint from query_server_url".to_string();
+                let err_msg = "Couldn't build summary_endpoint from query_server_url".to_string();
                 error!(target: "stdout", "{}", &err_msg);
 
                 return error::internal_server_error(err_msg);
@@ -515,7 +515,7 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
             )) {
             Ok(request) => request,
             Err(_) => {
-                let err_msg = "failed to build request to LLamaEdge query server.".to_string();
+                let err_msg = "Failed to build request to LLamaEdge query server.".to_string();
                 error!(target: "stdout", "{}", &err_msg);
                 return error::internal_server_error(err_msg);
             }
@@ -524,55 +524,57 @@ pub(crate) async fn rag_query_handler(mut req: Request<Body>) -> Response<Body>
         info!(target: "stdout", "Querying the LlamaEdge query server.");
 
         let client = hyper::client::Client::new();
-        let res = match client.request(req).await {
-            Ok(response) => response,
-            Err(e) => {
-                let err_msg = format!("couldn't make request to LlamaEdge query server: {}", e);
-                error!(target: "stdout", "{}", &err_msg);
-
-                return error::internal_server_error(err_msg);
-            }
-        };
-
-        let is_success = res.status().is_success();
+        match client.request(req).await {
+            Ok(res) => {
+                let is_success = res.status().is_success();
 
-        let body_bytes = match hyper::body::to_bytes(res.into_body()).await {
-            Ok(bytes) => bytes,
-            Err(e) => {
-                let err_msg = format!("couldn't convert body into bytes: {}", e);
-                error!(target: "stdout", "{}", &err_msg);
+                let body_bytes = match hyper::body::to_bytes(res.into_body()).await {
+                    Ok(bytes) => bytes,
+                    Err(e) => {
+                        let err_msg = format!("Couldn't convert body into bytes: {}", e);
+                        error!(target: "stdout", "{}", &err_msg);
 
-                return error::internal_server_error(err_msg);
-            }
-        };
+                        return error::internal_server_error(err_msg);
+                    }
+                };
 
-        let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) {
-            Ok(json) => json,
-            Err(e) => {
-                let err_msg = format!("couldn't convert body into json: {}", e);
-                error!(target: "stdout", "{}", &err_msg);
+                let body_json: serde_json::Value = match serde_json::from_slice(&body_bytes) {
+                    Ok(json) => json,
+                    Err(e) => {
+                        let err_msg = format!("Couldn't convert body into json: {}", e);
+                        error!(target: "stdout", "{}", &err_msg);
 
-                return error::internal_server_error(err_msg);
-            }
-        };
+                        return error::internal_server_error(err_msg);
+                    }
+                };
 
-        info!(target: "stdout", "processed query server response json body: \n{}", body_json);
+                info!(target: "stdout", "processed query server response json body: \n{}", body_json);
 
-        // if the request is a success, check decision and inject results accordingly.
-        if is_success && body_json["decision"].as_bool().unwrap_or(true) {
-            // the logic to ensure "results" is a serde_json::Value::String is present on the
-            // llamaedge-query-server.
-            let results = body_json["results"].as_str().unwrap_or("");
+                // if the request is a success, check decision and inject results accordingly.
+                if is_success && body_json["decision"].as_bool().unwrap_or(true) {
+                    // the logic to ensure "results" is a serde_json::Value::String is present on the
+                    // llamaedge-query-server.
+                    let results = body_json["results"].as_str().unwrap_or("");
 
-            //inject search results
-            let system_search_result_message: ChatCompletionRequestMessage =
-                ChatCompletionRequestMessage::new_system_message(results, None);
+                    info!(target: "stdout", "injecting search summary into conversation context.");
+                    //inject search results
+                    let system_search_result_message: ChatCompletionRequestMessage =
+                        ChatCompletionRequestMessage::new_system_message(results, None);
 
-            chat_request.messages.insert(
-                chat_request.messages.len() - 1,
-                system_search_result_message,
-            )
-        }
+                    chat_request.messages.insert(
+                        chat_request.messages.len() - 1,
+                        system_search_result_message,
+                    )
+                }
+            }
+            Err(e) => {
+                let err_msg = format!(
+                    "Couldn't make request to LlamaEdge query server, switching to regular RAG: {}",
+                    e
+                );
+                warn!(target: "stdout", "{}", &err_msg);
+            }
+        };
     }
 
     // chat completion