eager-embed/evaluate_mteb.py at main · eagerworks/eager-embed · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Example script showing how to use the EagerEmbedV1Wrapper MTEB wrapper for evaluation.
"""

# Relevant code:
# mteb/_create_dataloaders.py

import mteb
from mteb_wrapper import get_eager_embed_v1_model_meta


def evaluate_mteb():
    """MTEB evaluation with uploaded model."""

    model_meta = mteb.get_model_meta('eagerworks/eager-embed-v1')
    model = model_meta.load_model()

    # Get benchmarks and extract tasks from them
    benchmarks = mteb.get_benchmarks(["ViDoRe(v2)"])
    tasks = []
    for benchmark in benchmarks:
        tasks.extend(benchmark.tasks)
    print(tasks)
    # Run evaluation with reduced batch size to save CUDA memory
    results = mteb.evaluate(model=model, tasks=tasks, encode_kwargs={"batch_size": 8})

    print("Evaluation complete!")
    print(results)


def evaluate_mteb_with_custom_model():
    """MTEB evaluation with local model."""
    import torch

    # Create model meta (remote)
    model_meta = get_eager_embed_v1_model_meta(
        model_name="eagerworks/eager-embed-v1",
        revision="a6bec272729c5056e2c26618ce085205c82a3b3c",
        dtype=torch.float16,
        use_peft=False,
        image_size=784,
    )

    # Create model meta (local)
    # model_meta = get_eager_embed_v1_model_meta(
    #     model_name="./run2_8x5090",
    #     revision="main",
    #     dtype=torch.float16,
    #     use_peft=True,
    #     image_size=784,
    # )

    # Initialize wrapper
    model = model_meta.load_model()

    # Get benchmarks and extract tasks from them
    benchmarks = mteb.get_benchmarks(["ViDoRe(v2)"])
    print(benchmarks)

    tasks = []
    for benchmark in benchmarks:
        tasks.extend(benchmark.tasks)
    # tasks = mteb.get_tasks(["Vidore2ESGReportsHLRetrieval", "Vidore2BioMedicalLecturesRetrieval"])

    # Run evaluation with reduced batch size to save CUDA memory
    results = mteb.evaluate(model=model, tasks=tasks, encode_kwargs={"batch_size": 8})

    print("Evaluation complete!")
    print(results)


def compute_memory_usage():
    import mteb

    print("Computing memory usage...")
    model_meta = mteb.get_model_meta('eagerworks/eager-embed-v1')
    model_memory = model_meta.calculate_memory_usage_mb()
    print(f"Model memory usage: {model_memory} MB")


if __name__ == "__main__":
    print("\n" + "=" * 80)
    print("MTEB evaluation")
    print("=" * 80)
    evaluate_mteb()