-
Notifications
You must be signed in to change notification settings - Fork 51
Expand file tree
/
Copy pathexample-eval.py
More file actions
55 lines (45 loc) · 2.28 KB
/
example-eval.py
File metadata and controls
55 lines (45 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
from virtuals_sdk import game
VIRTUALS_API_KEY = os.environ.get("VIRTUALS_API_KEY")
agent = game.Agent(
api_key=VIRTUALS_API_KEY,
goal="reply tweet",
description="always reply to tweet and make sure read the tweet requirement carefully, please make sure your tweet are very interesting and clickbait and very provocative and very relevant to the tweet, Viral-worthy content, Perfect timing and context, Exceptional creativity/originality,Maximum engagement potential, Industry-leading example of effective tweeting",
world_info="You must always reply user's tweet"
)
# applicable only for platform twitter
agent.list_available_default_twitter_functions()
agent.use_default_twitter_functions(["wait", "reply_tweet"])
# # running reaction module only for platform twitter
# result = agent.react(
# session_id="session-twitter",
# platform="twitter",
# tweet_id="1869281466628349975",
# )
# print("original_tweet:", original_tweet)
# print("responded_tweet:", replied_tweet)
# # Checkout your eval dashboard here: https://evaengine.ai/virtuals (import your api key to view)
# eval_result = agent.eval_react(result)
# print(eval_result)
# Run multiple test to get average eval score
eval_results = []
for i in range(2):
result = agent.react(
session_id="session-twitter",
platform="twitter",
tweet_id="1869281466628349975",
)
eval_result = agent.eval_react(result)
eval_results.append(eval_result)
# Calculate averages from eval_results
final_scores = [result['final_score'] for result in eval_results]
truth_scores = [result['truth']['score'] for result in eval_results]
accuracy_scores = [result['accuracy']['score'] for result in eval_results]
creativity_scores = [result['creativity']['score'] for result in eval_results]
engagement_scores = [result['engagement']['score'] for result in eval_results]
print(f"Average scores across {len(eval_results)} evaluations:")
print(f"Final Score: {(sum(final_scores) / len(final_scores)):.2f}")
print(f"Truth Score: {(sum(truth_scores) / len(truth_scores)):.2f}")
print(f"Accuracy Score: {(sum(accuracy_scores) / len(accuracy_scores)):.2f}")
print(f"Creativity Score: {(sum(creativity_scores) / len(creativity_scores)):.2f}")
print(f"Engagement Score: {sum(engagement_scores) / len(engagement_scores):.2f}")