diff --git a/src/copilot_aisdk/chat.py b/src/copilot_aisdk/chat.py index d2e57aa..7064636 100644 --- a/src/copilot_aisdk/chat.py +++ b/src/copilot_aisdk/chat.py @@ -38,17 +38,22 @@ async def get_documents(query, num_docs=5): vector_queries=[vector_query], select=["id", "content"]) + citations = [] async for result in results: context += f"\n>>> From: {result['id']}\n{result['content']}" + citations.append({ + "id": result["id"], + "content" : result["content"] + }) - return context + return citations async def chat_completion(messages: list[dict], stream: bool = False, session_state: Any = None, context: dict[str, Any] = {}): # get search documents for the last user message in the conversation user_message = messages[-1]["content"] - documents = await get_documents(user_message, context.get("num_retrieved_docs", 5)) + documents = await get_documents(user_message, context.pop("num_retrieved_docs", 5)) # make a copy of the context and modify it with the retrieved documents context = dict(context) @@ -61,11 +66,13 @@ async def chat_completion(messages: list[dict], stream: bool = False, # call Azure OpenAI with the system prompt and user's question response = openai.ChatCompletion.create( engine=os.environ.get("AZURE_OPENAI_CHAT_DEPLOYMENT"), - messages=messages, temperature=context.get("temperature", 0.7), + messages=messages, temperature=context.pop("temperature", 0.7), stream=stream, max_tokens=800) # add context in the returned response if not stream: - response.choices[0]['context'] = context['documents'] + if response.choices[0].get("context") is None: + response.choices[0]['context'] = {} + response.choices[0]['context']["citations"] = context['documents'] return response diff --git a/src/run.py b/src/run.py index 67e1355..2733a18 100644 --- a/src/run.py +++ b/src/run.py @@ -64,12 +64,40 @@ def copilot_qna(question, chat_completion_fn): chat_completion_fn([{"role": "user", "content": question}]) ) response = result['choices'][0] + + context = "" + for resutl in response["context"] + context += f"\n>>> From: {result['id']}\n{result['content']}" return { "question": question, "answer": response["message"]["content"], + "context": context + } + +# TEMP Wraper to convert chat_completion reponse to supported chat protocol +def copilot_single_turn_chat(question, chat_completion_fn, num_retrieved_docs=5, temperature=0.7, **kwargs): + if platform.system() == 'Windows': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + + user_message = {"role": "user", "content": question} + + result = asyncio.run( + chat_completion_fn([{"role": "user", "content": question}], context={"num_retrieved_docs": num_retrieved_docs, "temperature": temperature}) + ) + response = result['choices'][0] + + system_message = { + "role": response["message"]["role"], + "content": response["message"]["content"], "context": response["context"] } + messages = [user_message, system_message] + + return { + "messages": messages + } + # Define helper methods def load_jsonl(path): @@ -124,6 +152,49 @@ def read_eval_artifacts(result): return result.metrics_summary, read_eval_artifacts(result) +def run_chat_evaluation(chat_completion_fn, name, dataset_path): + from azure.ai.generative.evaluate import evaluate + + # Evaluate the default vs the improved system prompt to see if the improved prompt + # performs consistently better across a larger set of inputs + path = pathlib.Path.cwd() / dataset_path + dataset = load_jsonl(path) + + # temp: generate a single-turn qna wrapper over the chat completion function + chat_fn = lambda question, **kwargs: copilot_single_turn_chat(question, chat_completion_fn, **kwargs) + + client = AIClient.from_config(DefaultAzureCredential()) + result = evaluate( + evaluation_name=name, + target=chat_fn, + data=dataset, + task_type="chat", + data_mapping={ + "y_pred": "messages", + }, + model_config={ + "api_version": "2023-05-15", + "api_base": os.getenv("OPENAI_API_BASE"), + "api_type": "azure", + "api_key": os.getenv("OPENAI_API_KEY"), + "deployment_id": os.getenv("AZURE_OPENAI_EVALUATION_DEPLOYMENT") + }, + tracking_uri=client.tracking_uri, + ) + + def read_eval_artifacts(result): + tabular_result = None + with tempfile.TemporaryDirectory() as tmpdir: + result.download_evaluation_artifacts(tmpdir) + import pandas as pd + pd.set_option('display.max_colwidth', 15) + pd.set_option('display.max_columns', None) + tabular_result = pd.read_json(os.path.join(tmpdir, "eval_results.jsonl"), lines=True) + return tabular_result + + return result.metrics_summary, read_eval_artifacts(result) + + def deploy_flow(deployment_name, deployment_folder, chat_module): client = AIClient.from_config(DefaultAzureCredential()) deployment = Deployment( @@ -204,8 +275,12 @@ def deploy_flow(deployment_name, deployment_folder, chat_module): if args.build_index: build_cogsearch_index(os.getenv("AZURE_AI_SEARCH_INDEX_NAME"), "./data/3-product-info") elif args.evaluate: - metrics_summary, tabular_result = run_evaluation(chat_completion, name=f"test-{args.implementation}-copilot", + if args.implementation == "aisdk": + metrics_summary, tabular_result = run_chat_evaluation(chat_completion, name=f"test-{args.implementation}-copilot", dataset_path=args.dataset_path) + else: + metrics_summary, tabular_result = run_evaluation(chat_completion, name=f"test-{args.implementation}-copilot", + dataset_path=args.dataset_path) pprint("-----Summarized Metrics-----") pprint(metrics_summary) pprint("-----Tabular Result-----")