|
6 | 6 | EvaluationSchedule, |
7 | 7 | RecurrenceTrigger, |
8 | 8 | ) |
9 | | -from azure.ai.evaluation import CoherenceEvaluator |
| 9 | +from azure.ai.evaluation import CoherenceEvaluator, FluencyEvaluator, RelevanceEvaluator, ViolenceEvaluator, SexualEvaluator, HateUnfairnessEvaluator, ProtectedMaterialEvaluator, ContentSafetyEvaluator |
10 | 10 |
|
11 | 11 | # This sample includes the setup for an online evaluation schedule using the Azure AI Project SDK and Azure AI Evaluation SDK |
12 | 12 | # The schedule is configured to run daily over the collected trace data while running two evaluators: CoherenceEvaluator and RelevanceEvaluator |
|
28 | 28 | # You can modify it depending on your data schema |
29 | 29 | # The KQL query must output these required columns: operation_ID, operation_ParentID, and gen_ai_response_id |
30 | 30 | # You can choose which other columns to output as required by the evaluators you are using |
31 | | -KUSTO_QUERY = "let gen_ai_spans=(dependencies | where isnotnull(customDimensions[\"gen_ai.system\"]) | extend response_id = tostring(customDimensions[\"gen_ai.response.id\"]) | project id, operation_Id, operation_ParentId, timestamp, response_id); let gen_ai_events=(traces | where message in (\"gen_ai.choice\", \"gen_ai.user.message\", \"gen_ai.system.message\") or tostring(customDimensions[\"event.name\"]) in (\"gen_ai.choice\", \"gen_ai.user.message\", \"gen_ai.system.message\") | project id= operation_ParentId, operation_Id, operation_ParentId, user_input = iff(message == \"gen_ai.user.message\" or tostring(customDimensions[\"event.name\"]) == \"gen_ai.user.message\", parse_json(iff(message == \"gen_ai.user.message\", tostring(customDimensions[\"gen_ai.event.content\"]), message)).content, \"\"), system = iff(message == \"gen_ai.system.message\" or tostring(customDimensions[\"event.name\"]) == \"gen_ai.system.message\", parse_json(iff(message == \"gen_ai.system.message\", tostring(customDimensions[\"gen_ai.event.content\"]), message)).content, \"\"), llm_response = iff(message == \"gen_ai.choice\", parse_json(tostring(parse_json(tostring(customDimensions[\"gen_ai.event.content\"])).message)).content, iff(tostring(customDimensions[\"event.name\"]) == \"gen_ai.choice\", parse_json(parse_json(message).message).content, \"\")) | summarize operation_ParentId = any(operation_ParentId), Input = maxif(user_input, user_input != \"\"), System = maxif(system, system != \"\"), Output = maxif(llm_response, llm_response != \"\") by operation_Id, id); gen_ai_spans | join kind=inner (gen_ai_events) on id, operation_Id | project Input, System, Output, operation_Id, operation_ParentId, gen_ai_response_id = response_id" |
| 31 | +KUSTO_QUERY = "let gen_ai_spans=(dependencies | where isnotnull(customDimensions[\"gen_ai.system\"]) | extend response_id = tostring(customDimensions[\"gen_ai.response.id\"]) | project id, operation_Id, operation_ParentId, timestamp, response_id); let gen_ai_events=(traces | where message in (\"gen_ai.choice\", \"gen_ai.user.message\", \"gen_ai.system.message\") or tostring(customDimensions[\"event.name\"]) in (\"gen_ai.choice\", \"gen_ai.user.message\", \"gen_ai.system.message\") | project id= operation_ParentId, operation_Id, operation_ParentId, user_input = iff(message == \"gen_ai.user.message\" or tostring(customDimensions[\"event.name\"]) == \"gen_ai.user.message\", parse_json(iff(message == \"gen_ai.user.message\", tostring(customDimensions[\"gen_ai.event.content\"]), message)).content, \"\"), system = iff(message == \"gen_ai.system.message\" or tostring(customDimensions[\"event.name\"]) == \"gen_ai.system.message\", parse_json(iff(message == \"gen_ai.system.message\", tostring(customDimensions[\"gen_ai.event.content\"]), message)).content, \"\"), llm_response = iff(message == \"gen_ai.choice\", parse_json(tostring(parse_json(tostring(customDimensions[\"gen_ai.event.content\"])).message)).content, iff(tostring(customDimensions[\"event.name\"]) == \"gen_ai.choice\", parse_json(parse_json(message).message).content, \"\")) | summarize operation_ParentId = any(operation_ParentId), Input = maxif(user_input, user_input != \"\"), System = maxif(system, system != \"\"), Output = maxif(llm_response, llm_response != \"\") by operation_Id, id); gen_ai_spans | join kind=inner (gen_ai_events) on id, operation_Id | project Input, System, Output, operation_Id, operation_ParentId, gen_ai_response_id = response_id | where gen_ai_response_id != \"\"" |
32 | 32 |
|
33 | 33 |
|
34 | 34 |
|
|
71 | 71 | # ) |
72 | 72 |
|
73 | 73 | # CoherenceEvaluator |
74 | | -coherence_evaluator_config = EvaluatorConfiguration( |
75 | | - id=CoherenceEvaluator.id, |
76 | | - init_params={"model_config": model_config}, |
77 | | - data_mapping={"query": "${data.Input}", "response": "${data.Output}"} |
78 | | -) |
| 74 | +def get_evaluator_config(evaluator_id, model_config): |
| 75 | + return EvaluatorConfiguration( |
| 76 | + id=evaluator_id, |
| 77 | + init_params={"model_config": model_config}, |
| 78 | + data_mapping={"query": "${data.Input}", "response": "${data.Output}"} |
| 79 | + ) |
| 80 | + |
| 81 | +def get_evaluator_config_safety(evaluator_id, azure_ai_project_scope): |
| 82 | + return EvaluatorConfiguration( |
| 83 | + id=evaluator_id, |
| 84 | + init_params={"azure_ai_project": azure_ai_project_scope}, |
| 85 | + data_mapping={"query": "${data.Input}", "response": "${data.Output}"} |
| 86 | + ) |
| 87 | + |
| 88 | +# coherence_evaluator_config = create_evaluator_config( |
| 89 | +# CoherenceEvaluator, |
| 90 | +# model_config, |
| 91 | +# {"query": "${data.Input}", "response": "${data.Output}"} |
| 92 | +# ) |
| 93 | + |
| 94 | +# fluency_evaluator_config = create_evaluator_config( |
| 95 | +# FluencyEvaluator, |
| 96 | +# model_config, |
| 97 | +# {"query": "${data.Input}", "response": "${data.Output}"} |
| 98 | +# ) |
79 | 99 |
|
80 | 100 | # Frequency to run the schedule |
81 | 101 | recurrence_trigger = RecurrenceTrigger(frequency="hour", interval=1) |
82 | 102 |
|
| 103 | +credential = DefaultAzureCredential() |
| 104 | + |
83 | 105 | # Dictionary of evaluators |
84 | 106 | evaluators = { |
85 | | - "coherence" : coherence_evaluator_config |
| 107 | + "coherence" : get_evaluator_config(CoherenceEvaluator.id, model_config), |
| 108 | + "fluency" : get_evaluator_config(FluencyEvaluator.id, model_config), |
| 109 | + "relevance" : get_evaluator_config(RelevanceEvaluator.id, model_config), |
| 110 | + "violence" : get_evaluator_config_safety(ViolenceEvaluator.id, project_client.scope), |
| 111 | + "sexual" : get_evaluator_config_safety(SexualEvaluator.id, project_client.scope), |
| 112 | + "hateUnfairness" : get_evaluator_config_safety(HateUnfairnessEvaluator.id, project_client.scope), |
| 113 | + "protectedMaterial" : get_evaluator_config_safety(ProtectedMaterialEvaluator.id, project_client.scope), |
| 114 | + "contentSafety" : get_evaluator_config_safety(ContentSafetyEvaluator.id, project_client.scope) |
86 | 115 | } |
87 | 116 |
|
88 | 117 | name = SAMPLE_NAME |
|
0 commit comments