From f2ddd26c2fca85dbc35ec5046204c8dd97e94322 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 23 Jan 2024 17:57:56 +0800 Subject: [PATCH 001/108] [DOC] change repo url --- docs/about/dev.rst | 2 +- docs/start/exp.rst | 2 +- docs/start/install.rst | 2 +- docs/start/quick.rst | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/about/dev.rst b/docs/about/dev.rst index 3ffbe776..24146053 100644 --- a/docs/about/dev.rst +++ b/docs/about/dev.rst @@ -10,7 +10,7 @@ As a developer, you often want make changes to ``learnware`` and hope it would r .. code-block:: bash - $ git clone https://github.com/Learnware-LAMDA/Learnware.git && cd Learnware + $ git clone https://gitlink.org.cn/beimingwu/learnware.git && cd Learnware $ pip install -e .[dev] .. note:: diff --git a/docs/start/exp.rst b/docs/start/exp.rst index eb65d46d..b6839299 100644 --- a/docs/start/exp.rst +++ b/docs/start/exp.rst @@ -168,7 +168,7 @@ The results are depicted in the following table and figure. Similarly, even when Get Start Examples ========================= -Examples for `Tabular, Text` and `Image` data sets are available at `Learnware Examples `_. You can run { workflow.py } directly to reproduce related experiments. +Examples for `Tabular, Text` and `Image` data sets are available at `Learnware Examples `_. You can run { workflow.py } directly to reproduce related experiments. We utilize the `fire` module to construct our experiments. Table Examples diff --git a/docs/start/install.rst b/docs/start/install.rst index 2ac1d367..02ca018a 100644 --- a/docs/start/install.rst +++ b/docs/start/install.rst @@ -36,7 +36,7 @@ Also, Users can install ``learnware`` by the source code according to the follow .. code-block:: bash - $ git clone hhttps://github.com/Learnware-LAMDA/Learnware.git && cd Learnware + $ git clone https://gitlink.org.cn/beimingwu/learnware.git && cd Learnware $ pip install -e .[dev] .. note:: diff --git a/docs/start/quick.rst b/docs/start/quick.rst index aedbb6cf..dc4fb594 100644 --- a/docs/start/quick.rst +++ b/docs/start/quick.rst @@ -192,4 +192,4 @@ Auto Workflow Example The ``Learnware`` also offers automated workflow examples. This includes preparing learnwares, uploading and deleting learnwares from the market, and searching for learnwares using both semantic and statistical specifications. -To experience the basic workflow of the Learnware Market, please refer to `Learnware Examples `_. +To experience the basic workflow of the Learnware Market, please refer to `Learnware Examples `_. diff --git a/setup.py b/setup.py index 2d307d68..9f4e9985 100644 --- a/setup.py +++ b/setup.py @@ -91,7 +91,7 @@ def get_version(rel_path: str) -> str: name=NAME, version=VERSION, license="Apache-2.0 Licence", - url="https://gitee.com/beimingwu/learnware", + url="https://www.gitlink.org.cn/beimingwu/learnware", packages=find_packages(), include_package_data=True, description=DESCRIPTION, From c96422657bd871d620ad3a2e29bae5cb97e017d4 Mon Sep 17 00:00:00 2001 From: Gene Date: Wed, 24 Jan 2024 11:44:39 +0800 Subject: [PATCH 002/108] [DOC] change html tags --- README.md | 8 ++++---- README_zh.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 09fdbe9d..bc2f7539 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@
-

+

Python Versions @@ -23,14 +23,14 @@ License -

+
-

+

中文 | English

-

+
# Introduction diff --git a/README_zh.md b/README_zh.md index 3a3ebfa3..844ea65e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,7 +4,7 @@
-

+

Python Versions @@ -23,14 +23,14 @@ License -

+
-

+

中文 | English

-

+
# 简介 From 1fb0759a39f92cae7216f07ecebdd0ca8b3e0e43 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:18:01 +0800 Subject: [PATCH 003/108] [FIX, DOC] fix readthedocs config bug, modify framework svg --- .readthedocs.yaml => .readthedocs.yml | 0 docs/_static/img/learnware_framework.svg | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .readthedocs.yaml => .readthedocs.yml (100%) diff --git a/.readthedocs.yaml b/.readthedocs.yml similarity index 100% rename from .readthedocs.yaml rename to .readthedocs.yml diff --git a/docs/_static/img/learnware_framework.svg b/docs/_static/img/learnware_framework.svg index bf4326b1..34f13e11 100644 --- a/docs/_static/img/learnware_framework.svg +++ b/docs/_static/img/learnware_framework.svg @@ -1,4 +1,4 @@ -
Market
Market
Specification
Specification
Interface
Interface
Workflow
Workflow
Learnware Standard
Learnware Standard
Zip Package





Zip Package...
Semantic
Specification

Semantic...
Yaml Config
Yaml Config
Model File
Model File
Env Dependencies
Env Dependencies
Learnware
Learnware
Model



Model...
Classfication
Classficat...
Feature Extraction
Feature Extract...
Regression
Regression
Specification



Specification...
Semantic
Semantic
Others
Others
Statistical
Statistical
create
cre...
Market
Market
Checker







Checker...
Searcher



Searcher...
Semantic
Semantic
Statistical
Statisti...
Conda
Conda
Semantic
Semantic
Statictical
Statictica...
Database
Database
Organizer


Organizer...
Easy
Easy
Hetero
Hetero
submit
sub...
Submitting Workflow
Submitting Workflow
Deploying Workflow
Deploying Workflow
UserInfo






UserInfo...
Semantic
Spec
Semantic...
Statistical
Info
Statisti...
SearchResult






SearchResult...
Scores
Scores
Learnwares



Learnwares...
search
sea...
return
ret...
Executable
Learnwares





Executable...
ModelContainer


ModelContaine...
Docker
Docker
Conda
Conda
Reuser
Reuser
Model
Model
Specification
Specificat...
ContainerManager
ContainerManager

Executable
Learnwares

Executable...
Data Free Reuser


Data Free Reuser...
Averaging
Averaging
JobSelector
JobSelecto...
Data-Dependent Reuser



Data-Dependent Reuser...
Ensemble
Purning
Ensemble...
Feature
Argument
Feature...
Aligner







Aligner...
Feature
Aligner
Featur...
Label
Aligner
Label...
Specification
Specificati...
Learnware Client





Learnware Client...
login
login
BeimingWu Server



BeimingWu Server...
search
search
upload
upload
delete
delete
update
update
load
load
Network
Network
Statistical Specification
Statistical Specification
Semantic Specification










Semantic Specificati...
Data
Data
Feature
Feature
Task
Task
Label
Label
Library
Library
Description
Descripti...
Name
Name
Scenario
Scenario
License
License
Others
Others
System Specification
System Specification
Regular Specification
Regular Specification
RKMEStatSpecification










RKMEStatSpecification...
Data Set


Data Set...
Table
Table
Image
Image
Text
Text
RKME
RKME
Reduced Set


Reduced Set...
Weights
Weights
Reduced Points
Reduced Point...
HeteroMapSpecification









HeteroMapSpecification...
RKME Reduced Set
RKME Reduced Set
HeteroMapping
HeteroMapping
Hetero Reduced Set



Hetero Reduced Set...
Weights
Weights
Homo Embeddings
Homo Embeddings
Organizer
Organizer
Searcher
Searcher
EasyOrganizer
EasyOrganizer
HeteroOrganizer
HeteroOrganizer
Database



Database...
SqlLite
SqlLite
PostgresSql
PostgresSq...
HeteroMapping
HeteroMapping
HeteroMap
Specification
HeteroMap...
update
upd...
generate
gen...
save
save
load
load
save
save
load
load
Homo
StatSearcher
Homo...
Semantic Searcher



Semantic Searcher...
Checker











Checker...
Conda Checker








Conda Checker...
Semantic Checker
Semantic Checke...
Executable
Learnwares



Executable...
ModelContainer
ModelContaine...
Statistical Checker
Statistical Che...
check
che...
HeteroMap
Specification
HeteroMap...
RKMEStat
Specification
RKMEStat...
Heterom
StatSearcher
Heterom...
Fuzzy
Semantic Searcher
Fuzzy...
Exact
Semantic Searcher
Exact...
match
mat...
match
mat...
Text is not SVG - cannot display
\ No newline at end of file +
Market
Specification
Interface
Workflow
Learnware Standard
Zip Package





Semantic
Specification

Yaml Config
Model File
Env Dependencies
Learnware
Model



Classfication
Feature Extraction
Regression
Specification



Semantic
Others
Statistical
create
Market
Checker







Searcher



Semantic
Statistical
Conda
Semantic
Statictical
Database
Organizer


Easy
Hetero
submit
Submitting Workflow
Deploying Workflow
UserInfo






Semantic
Spec
Statistical
Info
SearchResult






Scores
Learnwares



search
return
Executable
Learnwares





ModelContainer


Docker
Conda
Reuser
Model
Specification
ContainerManager

Executable
Learnwares

Data-Free Reuser


Averaging
JobSelector
Data-Dependent Reuser



Ensemble
Purning
Feature
Argument
Aligner







Feature
Aligner
Label
Aligner
Specification
Learnware Client





login
BeimingWu Server



search
upload
delete
update
load
network
Statistical Specification
Semantic Specification










Data
Feature
Task
Label
Library
Description
Name
Scenario
License
Others
System Specification
Regular Specification
RKMEStatSpecification










Data Set


Table
Image
Text
RKME
Reduced Set


Weights
Reduced Points
HeteroMapSpecification









RKME Reduced Set
HeteroMapping
Hetero Reduced Set



Weights
Homo Embeddings
Organizer
Searcher
EasyOrganizer
HeteroOrganizer
Database



SqlLite
PostgresSql
HeteroMapping
HeteroMap
Specification
update
generate
save
load
save
load
Homo
StatSearcher
Semantic Searcher



Checker











Conda Checker








Semantic Checker
Executable
Learnwares



ModelContainer
Statistical Checker
check
HeteroMap
Specification
RKMEStat
Specification
Hetero
StatSearcher
Fuzzy
Semantic Searcher
Exact
Semantic Searcher
match
match
\ No newline at end of file From e356b3aac115ff1443d0e54472a537cd058dd051 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:23:22 +0800 Subject: [PATCH 004/108] [FIX] update readthedocs yam l --- .readthedocs.yml => .readthedocs.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .readthedocs.yml => .readthedocs.yaml (100%) diff --git a/.readthedocs.yml b/.readthedocs.yaml similarity index 100% rename from .readthedocs.yml rename to .readthedocs.yaml From a022728983645a2d985e6e89c7ce741707b95491 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:30:47 +0800 Subject: [PATCH 005/108] [MNT] update readthedocs config --- .readthedocs.yaml | 7 ++++--- docs/requirements.txt | 0 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c6f89cd4..a0ae266b 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,4 +1,3 @@ -# .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details @@ -8,6 +7,8 @@ version: 2 # Set the os and other tools you might need build: os: ubuntu-22.04 + tools: + python: "3.8" # Build documentation in the docs/ directory with Sphinx sphinx: @@ -16,6 +17,6 @@ sphinx: # Build all formats formats: all -# Optionally set the version of Python and requirements required to build your docs python: - version: 3.8 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..e69de29b From ca3f03f10509cac83b244651dcd099d0481723cd Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:42:14 +0800 Subject: [PATCH 006/108] [MNT] add sphinx_book_theme req --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index e69de29b..c228e7e8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx_book_theme==0.3.3 \ No newline at end of file From 79b53e7fa4e675c4e80596d326cfd7d88d4f5857 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:52:36 +0800 Subject: [PATCH 007/108] [FIX] fix readthedocs req with pip --- .readthedocs.yaml | 3 ++- docs/requirements.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 docs/requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index a0ae266b..82fd96cf 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,4 +19,5 @@ formats: all python: install: - - requirements: docs/requirements.txt \ No newline at end of file + - method: pip + path: .[dev] \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index c228e7e8..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -sphinx_book_theme==0.3.3 \ No newline at end of file From 06913b3b96e15213cf247a0e812b52c782cd62f4 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 16:58:28 +0800 Subject: [PATCH 008/108] [FIX] fix readthedocs req to install all --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 82fd96cf..89d5bde2 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -20,4 +20,4 @@ formats: all python: install: - method: pip - path: .[dev] \ No newline at end of file + path: .[full, dev] \ No newline at end of file From 23544ed0edf880e0ebd93d7dca8c6f499724d525 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 17:26:08 +0800 Subject: [PATCH 009/108] [DOC, FIX] update badge, update autodoc config, fix no logo in doc bug --- README_zh.md | 2 +- docs/conf.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README_zh.md b/README_zh.md index 844ea65e..db215a0b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -9,7 +9,7 @@ Python Versions - Platform + Platform PypI Versions diff --git a/docs/conf.py b/docs/conf.py index b8507b4a..da51f2ff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -96,7 +96,7 @@ # "display_version": False, "navigation_depth": 4, } -html_logo = "_static/img/logo/logo1.png" +html_logo = "_static/img/logo/logo.svg" # These folders are copied to the documentation's HTML output @@ -118,6 +118,10 @@ autodoc_default_flags = ["members"] autodoc_default_options = { "members": True, + 'private-members': True, + 'special-members': True, + 'undoc-members': True, + 'show-inheritance': True, "member-order": "bysource", "special-members": "__init__", } From f2024fdadb08f4932ee813e77924e5dd04a377cd Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 17:40:59 +0800 Subject: [PATCH 010/108] [MNT] publish 0.3.2 version --- docs/conf.py | 3 +-- learnware/__init__.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index da51f2ff..b32d5399 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -120,8 +120,7 @@ "members": True, 'private-members': True, 'special-members': True, - 'undoc-members': True, 'show-inheritance': True, "member-order": "bysource", - "special-members": "__init__", + "special-members": ["__init__", "__call__"], } diff --git a/learnware/__init__.py b/learnware/__init__.py index 8c8c7685..1293deed 100644 --- a/learnware/__init__.py +++ b/learnware/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.1" +__version__ = "0.3.2" import json import os From c6030a661f430b2d2c915b84406c9bf346401d5e Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 17:49:36 +0800 Subject: [PATCH 011/108] [MNT] fix autodoc bug with List --- docs/conf.py | 2 +- learnware/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b32d5399..440b6280 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -122,5 +122,5 @@ 'special-members': True, 'show-inheritance': True, "member-order": "bysource", - "special-members": ["__init__", "__call__"], + "special-members": "__init__, __call__", } diff --git a/learnware/__init__.py b/learnware/__init__.py index 1293deed..97e81afd 100644 --- a/learnware/__init__.py +++ b/learnware/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.2" +__version__ = "0.3.2.99" import json import os From d4bf97f63f9f70c67fd94c1356de5de60acb9066 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 24 Jan 2024 17:52:43 +0800 Subject: [PATCH 012/108] [FIX] pass flake8 test --- docs/conf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 440b6280..9b7b9135 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -118,9 +118,8 @@ autodoc_default_flags = ["members"] autodoc_default_options = { "members": True, - 'private-members': True, - 'special-members': True, - 'show-inheritance': True, - "member-order": "bysource", + "private-members": True, "special-members": "__init__, __call__", + "show-inheritance": True, + "member-order": "bysource", } From 4af111819004328cf4d8a529b57966920e8dd5d0 Mon Sep 17 00:00:00 2001 From: Gene Date: Wed, 24 Jan 2024 18:52:01 +0800 Subject: [PATCH 013/108] [DOC] polish contents --- README.md | 2 +- README_zh.md | 2 +- docs/start/intro.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bc2f7539..2c2ca07d 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ # Introduction -The _learnware_ paradigm, proposed by Professor Zhi-Hua Zhou in 2016 [1, 2], aims to build a vast model platform system, i.e., a _learnware dock system_, which systematically accommodates and organizes models shared by machine learning developers worldwide, and can efficiently identify and assemble existing helpful model(s) to solve future tasks in a unified way. +The _learnware_ paradigm was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In this paradigm, developers worldwide can share models with the _learnware dock system_, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. The `learnware` package provides a fundamental implementation of the central concepts and procedures within the learnware paradigm. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. diff --git a/README_zh.md b/README_zh.md index db215a0b..de5de9c2 100644 --- a/README_zh.md +++ b/README_zh.md @@ -34,7 +34,7 @@ # 简介 -学件范式由周志华教授在2016年提出 [1, 2],旨在构建一个巨大的模型平台系统:即学件基座系统,系统地组织管理世界各地的机器学习开发者分享的模型,并通过统一的方式识别、利用已有模型的能力快速解决新的机器学习任务。 +学件范式由周志华教授在2016年提出 [1, 2]。在学件范式下,世界各地的开发者可分享模型至学件基座系统,系统通过有效查搜和复用学件帮助用户高效解决机器学习任务,而无需从零开始构建机器学习模型。 本项目开发的 `learnware` 包对学件范式中的核心组件和算法进行了实现,全流程地支持学件上传、检测、组织、查搜、部署和复用等功能。基于良好的结构设计,`learnware` 包具有高度可扩展性,为后续相关算法和功能的开发打下坚实基础。 diff --git a/docs/start/intro.rst b/docs/start/intro.rst index 947d081b..2050b6b3 100644 --- a/docs/start/intro.rst +++ b/docs/start/intro.rst @@ -3,7 +3,7 @@ Introduction ================ -The *learnware* paradigm, proposed by Professor Zhi-Hua Zhou in 2016 [1, 2], aims to build a vast model platform system, i.e., a *learnware dock system*, which systematically accommodates and organizes models shared by machine learning developers worldwide, and can efficiently identify and assemble existing helpful model(s) to solve future tasks in a unified way. +The *learnware* paradigm was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In this paradigm, developers worldwide can share models with the *learnware dock system*, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. The ``learnware`` package provides a fundamental implementation of the central concepts and procedures within the learnware paradigm. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. From 8e4bbe7ff9e847fb1b835a125be711f3e9416fa6 Mon Sep 17 00:00:00 2001 From: Gene Date: Wed, 24 Jan 2024 18:59:37 +0800 Subject: [PATCH 014/108] [DOC] modify details --- README_zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh.md b/README_zh.md index de5de9c2..a14d2657 100644 --- a/README_zh.md +++ b/README_zh.md @@ -34,7 +34,7 @@ # 简介 -学件范式由周志华教授在2016年提出 [1, 2]。在学件范式下,世界各地的开发者可分享模型至学件基座系统,系统通过有效查搜和复用学件帮助用户高效解决机器学习任务,而无需从零开始构建机器学习模型。 +学件范式由周志华教授在 2016 年提出 [1, 2]。在学件范式下,世界各地的开发者可分享模型至学件基座系统,系统通过有效查搜和复用学件帮助用户高效解决机器学习任务,而无需从零开始构建机器学习模型。 本项目开发的 `learnware` 包对学件范式中的核心组件和算法进行了实现,全流程地支持学件上传、检测、组织、查搜、部署和复用等功能。基于良好的结构设计,`learnware` 包具有高度可扩展性,为后续相关算法和功能的开发打下坚实基础。 From 4155e3615a404bef8e8d0eade693f8d9d3cc2638 Mon Sep 17 00:00:00 2001 From: Gene Date: Thu, 25 Jan 2024 01:21:09 +0800 Subject: [PATCH 015/108] [DOC] polish contents of README --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2c2ca07d..2a475ea9 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ # Introduction -The _learnware_ paradigm was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In this paradigm, developers worldwide can share models with the _learnware dock system_, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. +_Learnware_ paradigm was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In the _learnware paradigm_, developers worldwide can share models with the _learnware dock system_, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. The `learnware` package provides a fundamental implementation of the central concepts and procedures within the learnware paradigm. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. diff --git a/README_zh.md b/README_zh.md index a14d2657..b7098131 100644 --- a/README_zh.md +++ b/README_zh.md @@ -34,7 +34,7 @@ # 简介 -学件范式由周志华教授在 2016 年提出 [1, 2]。在学件范式下,世界各地的开发者可分享模型至学件基座系统,系统通过有效查搜和复用学件帮助用户高效解决机器学习任务,而无需从零开始构建机器学习模型。 +学件由周志华教授在 2016 年提出 [1, 2]。在学件范式下,世界各地的开发者可分享模型至学件基座系统,系统通过有效查搜和复用学件帮助用户高效解决机器学习任务,而无需从零开始构建机器学习模型。 本项目开发的 `learnware` 包对学件范式中的核心组件和算法进行了实现,全流程地支持学件上传、检测、组织、查搜、部署和复用等功能。基于良好的结构设计,`learnware` 包具有高度可扩展性,为后续相关算法和功能的开发打下坚实基础。 From a2dc5f7930b188b4385e5c0cc16ac691cb9e6155 Mon Sep 17 00:00:00 2001 From: Gene Date: Thu, 25 Jan 2024 01:21:23 +0800 Subject: [PATCH 016/108] [DOC] polish contents of project docs --- docs/references/beimingwu.rst | 7 +++---- docs/start/intro.rst | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/references/beimingwu.rst b/docs/references/beimingwu.rst index d1840cda..e5f626ba 100644 --- a/docs/references/beimingwu.rst +++ b/docs/references/beimingwu.rst @@ -3,7 +3,7 @@ Beimingwu System ==================== -`Beimingwu System `_ is based on the learnware paradigm, which systematically implements the entire process of learnware from submission to deployment, helping users effectively search and reuse learnwares without the need to build machine learning models from scratch. +`Beimingwu `_ is the first systematic open-source implementation of learnware dock system, providing a preliminary research platform for learnware studies. Developers worldwide can submit their models freely to the learnware dock. They can generate specifications for the model with the help of Beimingwu without disclosing their raw data, and then the model and specification can be assembled into a learnware, which will be accommodated in the learnware dock. Future users can solve their tasks by submitting their requirements and reusing helpful learnwares returned by Beimingwu, while also not disclosing their own data. It is anticipated that after Beimingwu accumulates millions of learnwares, an "emergent" behavior may occur: machine learning tasks that have never been specifically tackled may be solved by assembling and reusing some existing learnwares. The ``learnware`` package is the cornerstone of the Beimingwu system, functioning as its core engine. It offers a comprehensive suite of central APIs that encompass a wide range of functionalities, including the submission, verification, organization, search, and deployment of learnware. @@ -12,7 +12,7 @@ This integration ensures a streamlined and efficient process, facilitating seaml Core Features in the Beimingwu System ======================================= -Beimingwu systematically implements the core process of the learnware paradigm for the first time: +The Beimingwu learnware dock system, serving as a preliminary research platform for learnware, systematically implements the core processes of the learnware paradigm for the first time: - ``Submitting Stage``: The system includes multiple detection mechanisms to ensure the quality of uploaded learnwares. Additionally, the system trains a heterogeneous engine based on existing learnware specifications in the system to merge different specification islands and assign new specifications to learnwares. With more learnwares are submitted, the heterogeneous engine will continue to update, achieving continuous iteration of learnware specifications and building a more precise specification world. - ``Deploying Stage``: After users upload task requirements, the system automatically selects whether to recommend a single learnware or multiple learnware combinations and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse tools. @@ -26,5 +26,4 @@ In addition, the Beimingwu system also has the following features: - ``Data Privacy Protection``: The Beimingwu system operations, including learnware upload, search, and deployment, do not require users to upload local data. All relevant statistical specifications are generated locally by users, ensuring data privacy. - ``Open Source System``: The Beimingwu system's source code is open-source, including the learnware package and frontend/backend code. The learnware package is highly extensible, making it easy to integrate new specification designs, learnware system designs, and learnware reuse methods in the future. -Beimingwu is the first system-level implementation of the learnware paradigm. -This pioneering venture is just the beginning, with vast opportunities for enhancement and growth in the related technological fields still ahead. \ No newline at end of file +Building the learnware paradigm requires collective efforts from the community. As the first learnware dock system, Beimingwu is still in its early stages, with much room for improvement in related technologies. We sincerely invite the community to upload models, collaborate in system development, and engage in research and enhancements in learnware algorithms. Your valuable feedback is essential for the continuous improvement of the system. \ No newline at end of file diff --git a/docs/start/intro.rst b/docs/start/intro.rst index 2050b6b3..d1dcd6e5 100644 --- a/docs/start/intro.rst +++ b/docs/start/intro.rst @@ -3,7 +3,7 @@ Introduction ================ -The *learnware* paradigm was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In this paradigm, developers worldwide can share models with the *learnware dock system*, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. +*Learnware* was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In the *learnware paradigm*, developers worldwide can share models with the *learnware dock system*, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. The ``learnware`` package provides a fundamental implementation of the central concepts and procedures within the learnware paradigm. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. From 69bc1cd41b248bb8ee4e3d88cfe8abf2114985df Mon Sep 17 00:00:00 2001 From: Gene Date: Thu, 25 Jan 2024 01:32:02 +0800 Subject: [PATCH 017/108] [DOC] fix details --- README_zh.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README_zh.md b/README_zh.md index b7098131..fb3c5861 100644 --- a/README_zh.md +++ b/README_zh.md @@ -425,8 +425,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 如何贡献 `learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。 -我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。 -非常感谢大家的贡献! +我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的贡献! ## 关于我们 From 6fb97077ac6f1632c6446a34ff79c3a125f0152f Mon Sep 17 00:00:00 2001 From: Gene Date: Thu, 25 Jan 2024 01:37:31 +0800 Subject: [PATCH 018/108] [DOC] fix issue about line break --- README.md | 9 +++------ README_zh.md | 27 +++++++++------------------ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 2a475ea9..031fcf06 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,7 @@ In addition, the `learnware` package serves as the engine for the [Beimingwu Sys ## Learnware Paradigm -A learnware consists of a high-performance machine learning model and specifications that characterize the model, i.e., "Learnware = Model + Specification". -These specifications, encompassing both semantic and statistical aspects, detail the model's functionality and statistical information, making it easier for future users to identify and reuse these models. +A learnware consists of a high-performance machine learning model and specifications that characterize the model, i.e., "Learnware = Model + Specification". These specifications, encompassing both semantic and statistical aspects, detail the model's functionality and statistical information, making it easier for future users to identify and reuse these models.
@@ -369,8 +368,7 @@ Finally, we evaluate our algorithms on text datasets. Text data naturally exhibi ### Settings -We conduct experiments on the well-known text classification dataset: [20-newsgroup](http://qwone.com/~jason/20Newsgroups/), which consists approximately 20000 newsgroup documents partitioned across 20 different newsgroups. -Similar to the image experiments, a total of 50 learnwares are uploaded. Each learnware is trained on a subset that includes only half of the samples from three superclasses and the model in it is a tf-idf feature extractor combined with a naive Bayes classifier. We define 10 user tasks, and each of them encompasses two superclasses. +We conduct experiments on the well-known text classification dataset: [20-newsgroup](http://qwone.com/~jason/20Newsgroups/), which consists approximately 20000 newsgroup documents partitioned across 20 different newsgroups. Similar to the image experiments, a total of 50 learnwares are uploaded. Each learnware is trained on a subset that includes only half of the samples from three superclasses and the model in it is a tf-idf feature extractor combined with a naive Bayes classifier. We define 10 user tasks, and each of them encompasses two superclasses. ### Results @@ -420,5 +418,4 @@ Learnware is still young and may contain bugs and issues. We highly value and en ## About Us -The Learnware repository is developed and maintained by the LAMDA Beimingwu R&D Team. -To learn more about our team, please visit the [Team Overview](https://docs.bmwu.cloud/en/about-us.html). +The Learnware repository is developed and maintained by the LAMDA Beimingwu R&D Team. To learn more about our team, please visit the [Team Overview](https://docs.bmwu.cloud/en/about-us.html). diff --git a/README_zh.md b/README_zh.md index fb3c5861..48576c69 100644 --- a/README_zh.md +++ b/README_zh.md @@ -45,8 +45,7 @@ ## 学件范式 -学件由性能优良的机器学习模型和描述模型的**规约**构成,即「学件 = 模型 + 规约」。 -学件的规约由「语义规约」和「统计规约」两部分组成: +学件由性能优良的机器学习模型和描述模型的**规约**构成,即「学件 = 模型 + 规约」。学件的规约由「语义规约」和「统计规约」两部分组成: - 语义规约通过文本对模型的类型及功能进行描述; - 统计规约则通过各类机器学习技术,刻画模型所蕴含的统计信息。 @@ -68,8 +67,7 @@
-架构设计的原则包括:解耦 (Decoupling)、自治 (Autonomy)、可重用性 (Reusability) 以及可扩展性 (Scalability)。 -上图从模块和工作流程的角度对整个架构进行了阐述。 +架构设计的原则包括:解耦 (Decoupling)、自治 (Autonomy)、可重用性 (Reusability) 以及可扩展性 (Scalability)。上图从模块和工作流程的角度对整个架构进行了阐述。 - 针对工作流程 (Workflow),`learnware` 包括「提交阶段」和「部署阶段」。 @@ -125,8 +123,7 @@ pip install learnware[full] - `stat.json`:学件的统计规约,其文件名可自定义并记录在 learnware.yaml 中; - `environment.yaml` 或 `requirements.txt`:指明模型的运行环境。 -为方便大家构建学件,我们提供了「[学件模板](https://www.bmwu.cloud/static/learnware-template.zip)」,大家可在其基础上构建自己的学件。 -关于学件 `zip` 包中各文件的详细描述可参考文档:[学件准备](https://learnware.readthedocs.io/en/latest/workflows/upload.html#prepare-learnware)。 +为方便大家构建学件,我们提供了「[学件模板](https://www.bmwu.cloud/static/learnware-template.zip)」,大家可在其基础上构建自己的学件。关于学件 `zip` 包中各文件的详细描述可参考文档:[学件准备](https://learnware.readthedocs.io/en/latest/workflows/upload.html#prepare-learnware)。 ## 工作流程 @@ -188,8 +185,7 @@ print(single_result) ### 统计规约查搜 -如果提供统计规约文件 `stat.json`,学件市场可以基于上述查搜结果进一步进行更准确的查搜。 -此阶段的查搜将利用统计信息来识别一个或多个对你的任务有帮助的学件。 +如果提供统计规约文件 `stat.json`,学件市场可以基于上述查搜结果进一步进行更准确的查搜。此阶段的查搜将利用统计信息来识别一个或多个对你的任务有帮助的学件。 以下代码展示了使用 Reduced Kernel Mean Embedding (RKME) 作为统计规约进行查搜的例子: @@ -237,8 +233,7 @@ reuse_ensemble = AveragingReuser(learnware_list=mixture_item.learnwares) ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) ``` -我们还提供了两种方法,可基于用户的有标记数据来复用给定的学件集合:`EnsemblePruningReuser` 和 `FeatureAugmentReuser`。 -参考下述代码,其中 `test_x` 为测试数据,`train_x, train_y` 为有标记的训练数据: +我们还提供了两种方法,可基于用户的有标记数据来复用给定的学件集合:`EnsemblePruningReuser` 和 `FeatureAugmentReuser`。参考下述代码,其中 `test_x` 为测试数据,`train_x, train_y` 为有标记的训练数据: ```python from learnware.reuse import EnsemblePruningReuser, FeatureAugmentReuser @@ -256,8 +251,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ### 自动工作流程示例 -`learnware` 包提供了自动化的工作流程示例,包括准备学件、在学件市场中上传和删除学件,以及使用语义和统计规约查搜学件。 -工作流程示例可参考 `test/test_workflow/test_workflow.py` 文件。 +`learnware` 包提供了自动化的工作流程示例,包括准备学件、在学件市场中上传和删除学件,以及使用语义和统计规约查搜学件。工作流程示例可参考 `test/test_workflow/test_workflow.py` 文件。 # 实验示例 @@ -350,8 +344,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ### 实验设置 -我们选择了经典的图像分类数据集 [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html),其中包含 10 个类别的 60000 张 32x32 的彩色图像。总共上传了 50 个学件:每个学件包含一个卷积神经网络,该网络在一个不平衡的子集上进行训练,包括来自四个类别的 12000 个样本,采样比例为 `0.4:0.4:0.1:0.1`。 -总共测试了 100 个用户任务,每个用户任务包含 3000 个 CIFAR-10 样本,分为六个类别,采样比例为 `0.3:0.3:0.1:0.1:0.1:0.1`。 +我们选择了经典的图像分类数据集 [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html),其中包含 10 个类别的 60000 张 32x32 的彩色图像。总共上传了 50 个学件:每个学件包含一个卷积神经网络,该网络在一个不平衡的子集上进行训练,包括来自四个类别的 12000 个样本,采样比例为 `0.4:0.4:0.1:0.1`。总共测试了 100 个用户任务,每个用户任务包含 3000 个 CIFAR-10 样本,分为六个类别,采样比例为 `0.3:0.3:0.1:0.1:0.1:0.1`。 ### 实验结果 @@ -379,8 +372,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ### 实验设置 -我们在经典的文本分类数据集上进行了实验:[20-newsgroup](http://qwone.com/~jason/20Newsgroups/),该数据集包含大约 20000 份新闻文档,包含 20 个不同的新闻组。 -与图像实验类似,我们一共上传了 50 个学件。每个学件都是在一个子集上进行训练,该子集仅包括三个超类中一半样本的数据,其中的模型为 `tf-idf` 特征提取器与朴素贝叶斯分类器的结合。我们定义了 10 个用户任务,每个任务包括两个超类。 +我们在经典的文本分类数据集上进行了实验:[20-newsgroup](http://qwone.com/~jason/20Newsgroups/),该数据集包含大约 20000 份新闻文档,包含 20 个不同的新闻组。与图像实验类似,我们一共上传了 50 个学件。每个学件都是在一个子集上进行训练,该子集仅包括三个超类中一半样本的数据,其中的模型为 `tf-idf` 特征提取器与朴素贝叶斯分类器的结合。我们定义了 10 个用户任务,每个任务包括两个超类。 ### 实验结果 @@ -424,8 +416,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 如何贡献 -`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。 -我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的贡献! +`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的贡献! ## 关于我们 From 953160302249fbad3402e3c44a4d8dbe34ffaa89 Mon Sep 17 00:00:00 2001 From: bxdd Date: Thu, 25 Jan 2024 15:49:54 +0800 Subject: [PATCH 019/108] Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 75fc6d88..ae692d3c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,6 @@ Changelog ========= Here you can see the full list of changes between ``learnware`` release. -Version 0.3.0 +Version 0.3.2 --------------- This is the first public release of ``learnware`` package. From 7c816e31fdea22bb2ed14489cf2bab4dd55e72d9 Mon Sep 17 00:00:00 2001 From: liuht Date: Sun, 28 Jan 2024 15:46:12 +0800 Subject: [PATCH 020/108] [DOCS] polish contents in README and docs --- README.md | 29 +++++------ README_zh.md | 63 ++++++++++++------------ docs/components/learnware.rst | 69 ++++++++++++++------------- docs/components/market.rst | 75 +++++++++++++++-------------- docs/components/model.rst | 9 ++-- docs/components/spec.rst | 64 ++++++++++++------------- docs/index.rst | 8 ++-- docs/references/beimingwu.rst | 20 ++++---- docs/start/exp.rst | 2 +- docs/start/intro.rst | 77 +++++++++++++++--------------- docs/start/quick.rst | 43 +++++++---------- docs/workflows/client.rst | 57 ++++++++++++---------- docs/workflows/reuse.rst | 31 ++++++------ docs/workflows/search.rst | 90 +++++++++++++++++++---------------- docs/workflows/upload.rst | 4 +- 15 files changed, 325 insertions(+), 316 deletions(-) diff --git a/README.md b/README.md index 031fcf06..2ec33645 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,8 @@ A learnware consists of a high-performance machine learning model and specificat The above diagram illustrates the learnware paradigm, which consists of two distinct stages: -- `Submitting Stage`: Developers voluntarily submit various learnwares to the learnware market, and the system conducts quality checks and further organization of these learnwares. -- `Deploying Stage`: When users submit task requirements, the learnware market automatically selects whether to recommend a single learnware or a combination of multiple learnwares and provides efficient deployment methods. Whether it’s a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse interfaces. +- `Submitting Stage`: Developers voluntarily submit various learnwares to the learnware doc system, and the system conducts quality checks and further organization of these learnwares. +- `Deploying Stage`: When users submit task requirements, the learnware doc system automatically selects whether to recommend a single learnware or a combination of multiple learnwares and provides efficient deployment methods. Whether it’s a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse interfaces. ## Framework and Infrastructure Design @@ -126,7 +126,7 @@ Users can start a `learnware` workflow according to the following steps: ### Initialize a Learnware Market -The `EasyMarket` class provides the core functions of a `Learnware Market`. You can initialize a basic `Learnware Market` named "demo" using the code snippet below: +You can initialize a basic `Learnware Market` named "demo" using the code snippet below: ```python from learnware.market import instantiate_learnware_market @@ -137,7 +137,7 @@ demo_market = instantiate_learnware_market(market_id="demo", name="easy", rebuil ### Upload Learnware -Before uploading your learnware to the `Learnware Market`, you'll need to create a semantic specification, `semantic_spec`. This involves selecting or inputting values for predefined semantic tags to describe the features of your task and model. +Before uploading your learnware to the `Learnware Market`, you'll need to create a semantic specification, `semantic_spec`. This involves selecting or inputting values for semantic tags to describe the features of your task and model. For instance, the following code illustrates the semantic specification for a Scikit-Learn type model. This model is tailored for education scenarios and performs classification tasks on tabular data: @@ -154,7 +154,7 @@ semantic_spec = generate_semantic_spec( ) ``` -After defining the semantic specification, you can upload your learnware using a single line of code: +After preparing the semantic specification, you can insert your learnware into the learnware market using a single line of code: ```python demo_market.add_learnware(zip_path, semantic_spec) @@ -164,14 +164,14 @@ Here, `zip_path` is the file path of your learnware `zip` package. ### Semantic Specification Search -To find learnwares that align with your task's purpose, you'll need to provide a semantic specification, `user_semantic`, that outlines your task's characteristics. The `Learnware Market` will then perform an initial search using `user_semantic`, identifying potentially useful learnwares with models that solve tasks similar to your requirements. +To identify learnwares that align with your task's purpose, you'll need to provide a semantic specification, `user_semantic`, that outlines your task's characteristics. The `Learnware Market` will then perform an initial search based on `user_semantic`, which filters learnwares by considering the semantic information of your task. ```python # construct user_info, which includes a semantic specification user_info = BaseUserInfo(id="user", semantic_spec=semantic_spec) # search_learnware: performs semantic specification search when user_info doesn't include a statistical specification -search_result = easy_market.search_learnware(user_info) +search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() # single_result: the List of Tuple[Score, Learnware] returned by semantic specification search @@ -180,10 +180,9 @@ print(single_result) ### Statistical Specification Search -If you decide in favor of providing your own statistical specification file, `stat.json`, the `Learnware Market` can further refine the selection of learnwares from the previous step. This second-stage search leverages statistical information to identify one or more learnwares that are most likely to be beneficial for your task. - -For example, the code below executes learnware search when using Reduced Kernel Mean Embedding as the statistical specification: +If you generate and provide a statistical specification file `rkme.json`, the `Learnware Market` will conduct learnware identification based on statistical information, and return more targeted models. Using the API we provided, you can easily generate this statistical specification locally. +For example, the code below executes learnware search when using Reduced Kernel Mean Embedding (RKME) as the statistical specification: ```python import learnware.specification as specification @@ -194,7 +193,7 @@ user_spec.load(os.path.join(unzip_path, "rkme.json")) user_info = BaseUserInfo( semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_spec} ) -search_result = easy_market.search_learnware(user_info) +search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() multiple_result = search_results.get_multiple_results() @@ -214,12 +213,12 @@ for mixture_item in multiple_result: ### Reuse Learnwares -With the list of learnwares, `mixture_learnware_list`, returned from the previous step, you can readily apply them to make predictions on your own data, bypassing the need to train a model from scratch. We provide two methods for reusing a given list of learnwares: `JobSelectorReuser` and `AveragingReuser`. Substitute `test_x` in the code snippet below with your testing data, and you're all set to reuse learnwares: +We offer two data-free methods `JobSelectorReuser` and `AveragingReuser` for reusing a given list of learnwares. Please substitute ``test_x`` in the code snippet below with your own testing data: ```python from learnware.reuse import JobSelectorReuser, AveragingReuser -# using jobselector reuser to reuse the searched learnwares to make prediction +# using job selector reuser to reuse the searched learnwares to make prediction reuse_job_selector = JobSelectorReuser(learnware_list=mixture_item.learnwares) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) @@ -228,12 +227,14 @@ reuse_ensemble = AveragingReuser(learnware_list=mixture_item.learnwares) ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) ``` -We also provide two methods when the user has labeled data for reusing a given list of learnwares: `EnsemblePruningReuser` and `FeatureAugmentReuser`. Substitute `test_x` in the code snippet below with your testing data, and substitute `train_x, train_y` with your training labeled data, and you're all set to reuse learnwares: +We also provide two data-dependent methods: `EnsemblePruningReuser` and `FeatureAugmentReuser`, when the user has minor labeled data for refining a given list of learnwares. Here's an example for adopting multiple returned learnwares by labeled data to solve classification tasks: ```python from learnware.reuse import EnsemblePruningReuser, FeatureAugmentReuser # Use ensemble pruning reuser to reuse the searched learnwares to make prediction +# (train_x, train_y) is the small amount of labeled data +# `mode` has two options "classification" and "regression" reuse_ensemble = EnsemblePruningReuser(learnware_list=mixture_item.learnwares, mode="classification") reuse_ensemble.fit(train_x, train_y) ensemble_pruning_predict_y = reuse_ensemble.predict(user_data=test_x) diff --git a/README_zh.md b/README_zh.md index 48576c69..cb899c21 100644 --- a/README_zh.md +++ b/README_zh.md @@ -75,8 +75,8 @@ | 阶段 | 描述 | | ---- | ---- | -| 提交阶段 | 开发者自发地将学件提交到学件市场中,随后市场会进行学件检测并对这些学件进行相应地组织。 | -| 部署阶段 | 学件市场根据用户的任务需求推荐学件,并提供高效的学件部署和复用的方法。 | +| 提交阶段 | 开发者将学件提交到学件市场中,随后市场会进行学件检测并对这些学件进行相应地组织。 | +| 部署阶段 | 学件市场根据用户的任务需求推荐学件,并提供统一的学件部署和复用的方法。 | @@ -142,7 +142,7 @@ demo_market = instantiate_learnware_market(market_id="demo", name="easy", rebuil ### 上传学件 -在将学件上传到「学件市场」之前,需要创建相应的语义规约,即 `semantic_spec`。这涉及选择或输入预定义的语义标签的值,以描述你的任务和模型的特性。 +在将学件上传到「学件市场」之前,首先需要创建相应的语义规约 `semantic_spec`。这一过程包括选择或输入预定义的语义标签的值,以描述你的任务和模型的特性。 例如,以下代码示例生成了适用于教育场景的 `Scikit-Learn` 类型模型的语义规约。该模型用于对表格数据执行分类任务: @@ -169,14 +169,14 @@ demo_market.add_learnware(zip_path, semantic_spec) ### 语义规约查搜 -为了找到与你的任务目标相符的学件,你需要提供一个名为 `user_semantic` 的语义规约,来概述你的任务特点。随后,学件市场将通过 `user_semantic` 进行语义查搜,识别与你的任务需求相近的学件。 +为了匹配适合你的任务目标的学件,你需要提供一个名为 `user_semantic` 的语义规约,来描述你的任务特性。随后,学件市场将通过 `user_semantic` 进行语义查搜,识别与你的任务需求相近的学件。 ```python # 构造包含语义规约的 user_info user_info = BaseUserInfo(id="user", semantic_spec=semantic_spec) # search_learnware: 当 user_info 不包含统计规约时,仅执行语义规约查搜 -search_result = easy_market.search_learnware(user_info) +search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() # single_result: 语义规约查搜返回的 Tuple[Score, Learnware] 列表 @@ -185,7 +185,7 @@ print(single_result) ### 统计规约查搜 -如果提供统计规约文件 `stat.json`,学件市场可以基于上述查搜结果进一步进行更准确的查搜。此阶段的查搜将利用统计信息来识别一个或多个对你的任务有帮助的学件。 +如果提供统计规约文件 `stat.json`,学件市场可以在语义规约查搜之外、利用统计信息来为你进一步匹配和推荐模型。这些模型往往具有更好的针对性。 以下代码展示了使用 Reduced Kernel Mean Embedding (RKME) 作为统计规约进行查搜的例子: @@ -199,7 +199,7 @@ user_spec.load(os.path.join(unzip_path, "rkme.json")) user_info = BaseUserInfo( semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_spec} ) -search_result = easy_market.search_learnware(user_info) +search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() multiple_result = search_results.get_multiple_results() @@ -219,12 +219,12 @@ for mixture_item in multiple_result: ### 多学件复用 -使用上一步中返回的学件列表 `mixture_learnware_list`,你可以轻松地复用它们对自己的数据进行预测,而无需从头开始训练模型。我们提供了两种方法来重用学件集合:`JobSelectorReuser` 和 `AveragingReuser`。将以下代码片段中的 `test_x` 替换为你的测试数据,即可实现学件复用: +我们提供了两种数据无关的方法来重用学件集合:`JobSelectorReuser` 和 `AveragingReuser`。将以下代码片段中的 `test_x` 替换为你的测试数据,即可实现学件复用: ```python from learnware.reuse import JobSelectorReuser, AveragingReuser -# 使用 jobselector reuser 复用查搜到的学件, 并对 text_x 进行预测 +# 使用 job selector reuser 复用查搜到的学件, 并对 text_x 进行预测 reuse_job_selector = JobSelectorReuser(learnware_list=mixture_item.learnwares) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) @@ -233,12 +233,14 @@ reuse_ensemble = AveragingReuser(learnware_list=mixture_item.learnwares) ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) ``` -我们还提供了两种方法,可基于用户的有标记数据来复用给定的学件集合:`EnsemblePruningReuser` 和 `FeatureAugmentReuser`。参考下述代码,其中 `test_x` 为测试数据,`train_x, train_y` 为有标记的训练数据: +我们还提供了两种数据相关的方法,`EnsemblePruningReuser` 和 `FeatureAugmentReuser`,可基于用户的有标记数据复用指定的学件集合。以下代码展示了如何使用这些方法来处理分类任务,其中 `test_x` 是测试数据,`train_x, train_y`为有标记的训练数据: ```python from learnware.reuse import EnsemblePruningReuser, FeatureAugmentReuser # 使用 ensemble pruning reuser 复用查搜到的学件, 并对 text_x 进行预测 +# (train_x, train_y) 是有标记的训练数据 +# `mode` 提供两种模式 "classification" 和 "regression" reuse_ensemble = EnsemblePruningReuser(learnware_list=mixture_item.learnwares, mode="classification") reuse_ensemble.fit(train_x, train_y) ensemble_pruning_predict_y = reuse_ensemble.predict(user_data=test_x) @@ -271,25 +273,25 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 表格场景实验 -在各种表格数据集上,我们首先评估了从学件市场中识别和复用与用户任务具有相同特征空间的学件的性能。另外,由于表格任务通常来自异构的特征空间,我们还评估了从不同特征空间中识别和复用学件的性能。 +在各种表格数据集上,我们首先评估了从学件市场中识别和复用与用户任务具有相同特征空间的学件的性能。鉴于表格任务通常来自异构的特征空间,我们也对从不同特征空间中识别和复用学件的性能进行了评估。 ### 实验设置 -我们的实验利用了销量预测领域的三个公共数据集:[Predict Future Sales (PFS)](https://www.kaggle.com/c/competitive-data-science-predict-future-sales/data),[M5 Forecasting (M5)](https://www.kaggle.com/competitions/m5-forecasting-accuracy/data) 和 [Corporacion](https://www.kaggle.com/competitions/favorita-grocery-sales-forecasting/data)。为了扩大实验规模,我们对这些数据集应用了多种特征工程方法。然后,我们将每个数据集按店铺划分,并进一步将每个店铺的数据划分为训练集和测试集。我们在每个 Corporacion 和 PFS 训练集上训练了一个 LightGBM 模型,而测试集和 M5 数据集被用于构建用户任务。基于上述方式,我们构建了一个包含 265 个学件的学件市场,涵盖了五种特征空间和两种标签空间。所有这些学件都已上传至[北冥坞学件基座系统](https://bmwu.cloud/)。 +我们的实验使用了三个公开的销量预测数据集:[Predict Future Sales (PFS)](https://www.kaggle.com/c/competitive-data-science-predict-future-sales/data),[M5 Forecasting (M5)](https://www.kaggle.com/competitions/m5-forecasting-accuracy/data) 和 [Corporacion](https://www.kaggle.com/competitions/favorita-grocery-sales-forecasting/data)。为了增加实验的多样性,我们对这些数据集应用了多种特征工程方法。接着,我们将每个数据集按店铺划分,并将每个店铺的数据进一步划分为训练集和测试集。我们在 Corporacion 和 PFS 的每个训练集上训练了 LightGBM 模型,同时使用相应的测试集和 M5 数据集构建用户任务。基于这些实验设置,我们构建了一个包含 265 个学件的学件市场,覆盖了五种不同的特征空间和两种标记空间。所有这些学件都已上传至[北冥坞学件基座系统](https://bmwu.cloud/)。 ### 基线算法 -复用学件的最基本方式是 Top-1 复用 (Top-1 reuser),即直接使用由 RKME 规约选择的单个学件。此外,我们实现了两种数据无关复用器和两种数据相关复用器,它们可用于复用从市场中识别出的单个或多个有用的学件。当用户无标记的数据时,JobSelector 复用器通过训练一个任务选择器为不同的样本选择不同的学件;AverageEnsemble 复用器使用集成方法进行预测。在用户有测试数据和少量有标记训练数据的情况下,EnsemblePruning 复用器有选择地集成一组学件,选择最适合用户任务的学件;FeatureAugment 复用器将每个接收到的学件视为特征增强器,将其输出视为新特征,然后在增强的特征集上构建一个简单的模型。JobSelector 和 FeatureAugment 只对表格数据有效,而其他方法也适用于文本和图像数据。 +最基础的学件复用方式是 Top-1 复用 (Top-1 reuser),它直接使用根据 RKME 规约查搜得到的单个学件。此外,我们实现了两种数据无关的复用方法和两种数据相关的复方法,它们可用于复用从市场中识别出的单个或多个有用的学件。当用户无标记的数据时,JobSelectorReuser 通过训练一个任务选择器为不同的样本选择合适的学件;AverageEnsembleReuser 使用集成方法进行预测。在用户有测试数据和少量有标记训练数据的情况下,EnsemblePruningReuser 通过多目标演化算法挑选一组适合用户任务的学件,然后进行平均集成;FeatureAugmentReuser 将每个学件的预测输出视为新特征,并在增强后的特征集上构建一个简单模型。需要注意的是,JobSelectorReuser 和 FeatureAugmentReuser 只对表格数据有效,而其他方法也适用于文本和图像数据。 ### 同构场景 -在同构场景中,PFS 数据集中的 53 家商店被视为 53 个独立的用户。每个商店使用自己的测试数据作为用户数据,并应用与学件市场相同的特征工程方法。这些用户随后可以在市场内搜索与其任务具有相同特征空间的同构学件。 +在同构场景实验中,PFS 数据集中的 53 家商店被视为 53 个独立的用户。每个商店使用自己的测试数据作为用户数据,并采用与学件市场相同的特征工程方法。这些用户随后可以在市场内查搜与其任务具有相同特征空间的同构学件。 -当用户没有标记的数据或只有少量有标记数据时,我们对不同的基线算法进行了比较。下表显示了所有用户的平均损失。结果表明,我们提供的方法远远优于从市场中随机选择一个学件的结果。 +当用户没有标记的数据或只有少量有标记数据时,我们对不同的基线算法进行了比较。下表总结了所有用户的平均损失。结果表明,我们提供的方法优于从市场中随机选择学件的效果。
-| Setting | MSE | +| Setting | RMSE | |-----------------------------------|--------| | Mean in Market (Single) | 0.897 | | Best in Market (Single) | 0.756 | @@ -299,7 +301,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x)
-下图展示了当用户提供不同数量有标记数据的结果;对于每个用户,我们进行了多次实验,并计算了损失的均值和标准差;图中展示了所有用户的平均损失。其表明,当用户只有有限的训练数据时,识别和复用单个或多个学件相对于用户自己训练的模型表现出更好的性能。 +我们还探索了用户提供不同数量标记数据的情况。对于每个用户,我们进行了多次实验,并记录了损失的均值和标准差。下图展示了所有用户上的平均损失曲线。实验结果显示,当用户只有有限的训练数据时,识别和复用单个或多个学件相比于用户自行训练的模型表现出更好的性能。
@@ -307,17 +309,17 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ### 异构场景 -基于学件市场中学件与用户任务之间的相似性,异构情况可以进一步分为不同的特征工程和不同的任务场景。 +根据学件市场中学件与用户任务之间的相似性,异构场景的实验进一步分为两类:一类是特征空间异构但任务相同的情况,另一类则是任务本身不同的情况。 #### 不同特征工程的场景 -我们将 PFS 数据集中的 41 家商店视为用户,采用与市场中学件不同的特征工程方法生成他们的用户数据。因此,尽管市场上的某些学件也是为 PFS 数据集设计的,但特征空间并不完全一致。 +我们将 PFS 数据集中的 41 家商店作为用户,采用与市场中学件不同的特征工程方法生成他们的用户数据。因此,尽管市场上的某些学件也是为 PFS 数据集设计的,但特征空间并不完全一致。 -在这个实验设置中,我们研究了各种数据无关复用器。下表中的结果表明,即使用户缺乏标记数据,市场也能表现出较强的性能,特别是使用多学件复用方法 AverageEnsemble 时。 +在这种实验设定下,我们主要关注数据无关复用方法的表现。下表中的结果表明,即使用户没有标记数据,通过复用市场中的学件也能取得良好的性能,特别是使用多学件复用方法 AverageEnsemble 时。
-| Setting | MSE | +| Setting | RMSE | |-----------------------------------|--------| | Mean in Market (Single) | 1.149 | | Best in Market (Single) | 1.038 | @@ -329,9 +331,9 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) #### 不同的任务场景 -我们在 M5 数据集的所有十家商店上采用了三种不同的特征工程方法,总共生成了 30 个用户。尽管销量预测的总体任务与市场上的学件所处理的任务相符,但没有一个学件是为 M5 销量预测任务专门设计的。 +我们对 M5 数据集的十家商店采用三种不同的特征工程方法,设定了 30 个用户任务。尽管市场上的学件都用于销量预测任务,但它们并非专门为 M5 数据集的销量预测任务设计。 -在下图中,我们展示了用户自行训练的模型和几种学件复用方法的损失曲线。显然,异构学件在用户标记数据有限的情况下表现出了对用户任务的有效性。 +下图展示了用户自行训练的模型与几种学件复用方法的损失曲线对比。结果显示,异构学件能通过有限的标记数据,有效适应特定用户任务。
@@ -340,15 +342,15 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 图像场景实验 -其次,我们在图像数据集上评估了我们的算法。值得注意的是,不同尺寸的图像可以通过调整大小进行标准化,无需考虑异构特征情况。 +接下来,我们对图像数据集进行了算法评估。由于图像尺寸的差异可以通过调整大小来标准化处理,因此不需要考虑特征异构的情况。 ### 实验设置 -我们选择了经典的图像分类数据集 [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html),其中包含 10 个类别的 60000 张 32x32 的彩色图像。总共上传了 50 个学件:每个学件包含一个卷积神经网络,该网络在一个不平衡的子集上进行训练,包括来自四个类别的 12000 个样本,采样比例为 `0.4:0.4:0.1:0.1`。总共测试了 100 个用户任务,每个用户任务包含 3000 个 CIFAR-10 样本,分为六个类别,采样比例为 `0.3:0.3:0.1:0.1:0.1:0.1`。 +我们选用了经典的 [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) 图像分类数据集进行实验,该数据集包括 10 个类别的 60000 张 32x32 彩色图像。我们上传了 50 个学件,每个学件包含一个在不平衡子集上训练的卷积神经网络模型,这个子集由四个类别的 12000 个样本组成,其采样比例为 `0.4:0.4:0.1:0.1`。我们设定了 100 个用户任务,每个任务由 CIFAR-10 的 3,000 个样本组成,这些样本涵盖六个类别,采样比例为 `0.3:0.3:0.1:0.1:0.1:0.1`。 ### 实验结果 -我们使用 `1 - Accuracy` 作为损失度量来评估各种方法的平均性能。下述实验结果显示,当用户面临标记数据的稀缺或仅拥有有限数量的标记数据(少于 2000 个实例)时,利用学件市场可以获得更好的性能。 +我们使用 `1 - Accuracy` 作为损失度量来评估各种方法的平均性能。实验结果表明,在标记数据稀缺或仅有限数量(不超过 2000 个实例)的情况下,通过利用学件市场的资源,可以实现更优的性能表现。
@@ -368,15 +370,16 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 文本场景实验 -最后,我们在文本数据集上对我们的算法进行了评估。文本数据的特征天然异构,但这个问题可以通过使用句子嵌入提取器 (Sentence Embedding Extractor) 来解决。 +最后,我们在文本数据集上对我们的算法进行了评估。由于文本数据的特征天然异构,我们通过使用句子嵌入提取器(Sentence Embedding Extractor)来统一处理这一问题。 ### 实验设置 -我们在经典的文本分类数据集上进行了实验:[20-newsgroup](http://qwone.com/~jason/20Newsgroups/),该数据集包含大约 20000 份新闻文档,包含 20 个不同的新闻组。与图像实验类似,我们一共上传了 50 个学件。每个学件都是在一个子集上进行训练,该子集仅包括三个超类中一半样本的数据,其中的模型为 `tf-idf` 特征提取器与朴素贝叶斯分类器的结合。我们定义了 10 个用户任务,每个任务包括两个超类。 +我们在经典的文本分类数据集 [20-newsgroup](http://qwone.com/~jason/20Newsgroups/) 上进行了实验,该数据集包含约 20000 篇新闻文档,涵盖 20 个不同的新闻组。与图像实验类似,我们一共上传了 50 个学件。每个学件的模型组合了 tf-idf 特征提取器与朴素贝叶斯分类器,在一个样本子集上进行训练。这些样本子集仅包括三个超类中一半的样本数据。我们设置了 10 个用户任务,每个任务包括两个超类。 + ### 实验结果 -结果如下表和图所示。同样地,即使没有提供标记数据,通过学件的识别和复用所达到的性能可以与市场上最佳学件相匹敌。此外,利用学件市场相对于从头训练模型可以减少约 2000 个样本。 +结果如下表和图所示。同样地,即使没有提供标记数据,通过学件的识别和复用所达到的性能可以与市场上最佳学件相匹敌。此外,相比于从头训练模型,利用学件市场可以节省大约 2000 个样本。
@@ -416,7 +419,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 如何贡献 -`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的贡献! +`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的参与和支持! ## 关于我们 diff --git a/docs/components/learnware.rst b/docs/components/learnware.rst index a98070d8..71096042 100644 --- a/docs/components/learnware.rst +++ b/docs/components/learnware.rst @@ -19,60 +19,61 @@ In our implementation, the class ``Learnware`` has three important member variab Learnware for Hetero Reuse ======================================================================= -In the Hetero Market (refer to `COMPONENTS: Hetero Market <./market.html#hetero-market>`_ for more details), ``HeteroSearcher`` identifies and recommends valuable learnwares from the entire market. This includes learnwares with different feature/label spaces compared to the user's task requirements, known as "heterogeneous learnwares". +In the Hetero Market (refer to `COMPONENTS: Hetero Market <./market.html#hetero-market>`_ for more details), +``HeteroSearcher`` identifies and recommends valuable learnwares from the entire market, returning learnwares with different feature and prediction spaces compared to the user's task requirements, +known as "heterogeneous learnwares". -To enable the reuse of these heterogeneous learnwares, we have developed ``FeatureAlignLearnware`` and ``HeteroMapLearnware``. -These components expand the capabilities of standard ``Learnware`` by aligning the feature and label spaces to match the user's task requirements. -They also provide essential interfaces for effectively applying heterogeneous learnwares to tasks beyond their original purposes. +``FeatureAlignLearnware`` and ``HeteroMapLearnware`` facilitate the deployment and reuse of heterogeneous learnwares. +They extend the capabilities of standard ``Learnware`` by aligning the input and output domain of heterogeneous learnwares to match those of the user's task. +These feature-aligned learnwares can then be utilized with either data-free reusers or data-dependent reusers. ``FeatureAlignLearnware`` --------------------------- -``FeatureAlignLearnware`` employs a neural network to align the feature space of the learnware to the user's task. -It is initialized with a ``Learnware`` and has the following methods to expand the applicable scope of this ``Learnware``: +``FeatureAlignLearnware`` utilizes a neural network to align the feature space of the learnware to the user's task. +It is initialized with a ``Learnware`` and offers the following methods to extend the ability of this ``Learnware``: -- **align**: Trains a neural network to align ``user_rkme``, which is the ``RKMETableSpecification`` of the user's data, with the learnware's statistical specification. -- **predict**: Predict the output for user data using the trained neural network and the original learnware's model. +- **align**: This method trains a neural network to align ``user_rkme``(the ``RKMETableSpecification`` of the user's data) with the learnware's statistical specification. +- **predict**: Using the trained neural network and the original learnware's model, this method predicts the output for the user's data. ``HeteroMapAlignLearnware`` ----------------------------- -If user data is not only heterogeneous in feature space but also in label space, ``HeteroMapAlignLearnware`` uses the help of -a small amount of labeled data ``(x_train, y_train)`` required from the user task to align heterogeneous learnwares with the user task. -There are two critical interfaces in ``HeteroMapAlignLearnware``: +If user data is heterogeneous not only in feature space but also in label space, ``HeteroMapAlignLearnware`` employs +minor labeled data ``(x_train, y_train)`` from the user task to align heterogeneous learnwares with the user task. +``HeteroMapAlignLearnware`` provides two key interfaces: - ``HeteroMapAlignLearnware.align(self, user_rkme: RKMETableSpecification, x_train: np.ndarray, y_train: np.ndarray)`` - - **input space alignment**: Align the feature space of the learnware to the user task's statistical specification ``user_rkme`` using ``FeatureAlignLearnware``. - - **output space alignment**: Further align the label space of the aligned learnware to the user task through supervised learning of ``FeatureAugmentReuser`` using ``(x_train, y_train)``. + - **Input space alignment**: Aligns the learnware's feature space to the user task's statistical specification ``user_rkme`` using ``FeatureAlignLearnware``. + - **Output space alignment**: Further aligns the label space of the aligned learnware to the user task through a simple model ``FeatureAugmentReuser``, which conduct feature augmentation and is trained on ``(x_train, y_train)``. - ``HeteroMapAlignLearnware.predict(self, user_data)`` - - If input space and output space alignment are performed, use the ``FeatureAugmentReuser`` to predict the output for ``user_data``. + - If input space and output space alignment are performed, it uses ``FeatureAugmentReuser`` to predict the output for ``user_data``. All Reuse Methods =========================== -In addition to applying ``Learnware``, ``FeatureAlignLearnware`` or ``HeteroMapAlignLearnware`` objects directly by calling their ``predict`` interface, -the ``learnware`` package also provides a set of ``Reuse Methods`` for users to further customize a single or multiple learnwares, with the hope of enabling learnwares to be -helpful beyond their original purposes and eliminating the need for users to build models from scratch. +In addition to directly applying ``Learnware``, ``FeatureAlignLearnware`` or ``HeteroMapAlignLearnware`` objects by calling their ``predict`` interface, +the ``learnware`` package also provides a set of baseline ``Reuse Methods`` for users to further customize single or multiple learnwares, with the hope of enabling learnwares to be +helpful beyond their original purposes and reducing the need for users to build models from scratch. -There are two main categories of ``Reuse Methods``: (1) direct reuse and (2) reuse based on a small amount of labeled data. +There are two main categories of ``Reuse Methods``: (1) data-free reusers which reuse learnwares directly and (2) data-dependent reusers which reuse learnwares with a small amount of labeled data. .. note:: - Combine ``HeteroMapAlignLearnware`` with the following reuse methods to enable the reuse of heterogeneous learnwares. See `WORKFLOW: Hetero Reuse <../workflows/reuse.html#hetero-reuse>`_ for details. - -Direct Reuse of Learnware --------------------------- + Combine ``HeteroMapAlignLearnware`` with the following reuse methods to reuse heterogeneous learnwares conveniently. See `WORKFLOW: Hetero Reuse <../workflows/reuse.html#hetero-reuse>`_ for details. +Data-Free Reusers +------------------ Two methods for direct reuse of learnwares are provided: ``JobSelectorReuser`` and ``AveragingReuser``. JobSelectorReuser ^^^^^^^^^^^^^^^^^^ -``JobSelectorReuser`` trains a classifier ``job selector`` that identifies the optimal learnware for each data point in user data. +``JobSelectorReuser`` trains a classifier ``job selector`` that identifies the most suitable learnware for each data point in user data. There are three member variables: - ``learnware_list``: A list of ``Learnware`` objects for the ``JobSelectorReuser`` to choose from. @@ -81,14 +82,14 @@ There are three member variables: The most important methods of ``JobSelectorReuser`` are ``job_selector`` and ``predict``: -- **job_selector**: Train a ``job selector`` based on user's data and the ``learnware_list``. Processions are different based on the value of ``use_herding``: +- **job_selector**: Train a ``job selector`` based on user's data and the ``learnware_list``. The approaches varies based on the ``use_herding`` setting: - - If ``use_herding`` is False: Statistical specifications of learnwares in ``learnware_list`` combined with the corresponding learnware index are used to train the ``job selector``. + - If ``use_herding`` is False: Statistical specifications of learnwares in ``learnware_list``, along with their respective indices, are used to train the ``job selector``. - If ``use_herding`` is True: - - Estimate the mixture weight based on user raw data and the statistical specifications of learnwares in ``learnware_list`` - - Use the mixture weight to generate ``herding_num`` auxiliary data points which mimic the user task's distribution through the kernel herding method - - Finally, it learns the ``job selector`` on the auxiliary data points. + - The mixture weight is estimated based on user raw data and the statistical specifications of learnwares in ``learnware_list`` + - The kernel herding method generates ``herding_num`` auxiliary data points to mimic the user task's distribution using the mixture weight + - The ``job selector`` is then trained on these auxiliary data points - **predict**: The ``job selector`` is essentially a multi-class classifier :math:`g(\boldsymbol{x}):\mathcal{X}\rightarrow \mathcal{I}` with :math:`\mathcal{I}=\{1,\ldots, C\}`, where :math:`C` is the size of ``learnware_list``. Given a testing sample :math:`\boldsymbol{x}`, the ``JobSelectorReuser`` predicts it by using the :math:`g(\boldsymbol{x})`-th learnware in ``learnware_list``. @@ -105,8 +106,8 @@ specifies the ensemble method(default is set to ``mean``). - For classification tasks, ``mode`` has two available options. If ``mode`` is set to ``vote_by_label``, the prediction is the majority vote label based on learnwares' output labels. If ``mode`` is set to ``vote_by_prob``, the prediction is the mean vector of all learnwares' output label probabilities. -Reuse Learnware with Labeled Data ----------------------------------- +Data-Dependent Reusers: +------------------------ When users have a small amount of labeled data available, the ``learnware`` package provides two methods: ``EnsemblePruningReuser`` and ``FeatureAugmentReuser`` to help reuse learnwares. They are both initialized with a list of ``Learnware`` objects ``learnware_list`` and have different implementations of ``fit`` and ``predict`` methods. @@ -115,8 +116,8 @@ EnsemblePruningReuser ^^^^^^^^^^^^^^^^^^^^^^ The ``EnsemblePruningReuser`` class implements a selective ensemble approach inspired by the MDEP algorithm [1]_. -It selects a subset of learnwares from ``learnware_list``, utilizing the user's labeled data for effective ensemble integration on user tasks. -This method effectively balances validation error, margin ratio, and ensemble size, leading to a robust and optimized selection of learnwares for task-specific ensemble creation. +It selects a subset of learnwares from ``learnware_list`` using a multi-objective evolutionary algorithm and uses the ``AveragingReuser`` for average ensemble. +This method effectively balances validation error, margin ratio, and ensemble size, leading to a robust selection of learnwares for specific user tasks. - **fit**: Effectively prunes the large set of learnwares ``learnware_list`` by evaluating and comparing the learnwares based on their performance on user's labeled validation data ``(val_X, val_y)``. Returns the most suitable subset of learnwares. - **predict**: The ``mode`` member variable has two available options. Set ``mode`` to ``regression`` for regression tasks and ``classification`` for classification tasks. The prediction is the average of the selected learnwares' outputs. @@ -130,10 +131,10 @@ outputs of the learnwares from ``learnware_list`` on the user's validation data The augmented data (concatenated features combined with validation labels ``val_y``) are then used to train a simple model ``augment_reuser``, which gives the final prediction on ``user_data``. -- **fit**: Trains the ``augment_reuser`` using augmented user validation data. For classification tasks, ``mode`` should be set to ``classification``, and ``augment_reuser`` is a ``LogisticRegression`` model. For regression tasks, the mode should be set to ``classification``, and ``augment_reuser`` is a ``RidgeCV`` model. +- **fit**: Trains the ``augment_reuser`` using augmented user validation data. For classification tasks, ``mode`` should be set to ``classification``, and ``augment_reuser`` is a ``LogisticRegression`` model. For regression tasks, the mode should be set to ``regression``, and ``augment_reuser`` is a ``RidgeCV`` model. References ----------- -.. [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022. \ No newline at end of file +.. [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective evolutionary ensemble pruning guided by margin distribution. In *Proceedings of the 17th International Conference on Parallel Problem Solving from Nature*, 2022. \ No newline at end of file diff --git a/docs/components/market.rst b/docs/components/market.rst index f27a5cc1..c394edd5 100644 --- a/docs/components/market.rst +++ b/docs/components/market.rst @@ -4,34 +4,36 @@ Learnware Market ================================ -The ``Learnware Market`` receives high-performance machine learning models from developers, incorporates them into the system, and provides services to users by identifying and reusing learnware to help users solve current tasks. Developers voluntarily submit various learnwares to the learnware market, and the market conducts quality checks and further organization of these learnwares. When users submit task requirements, the learnware market automatically selects whether to recommend a single learnware or a combination of multiple learnwares. +The ``Learnware Market``, serving as the implementation of the learnware doc system, receives high-performance machine learning models from developers, incorporates them into the system, and provides services to users by identifying and reusing learnware to help users solve current tasks. Developers voluntarily submit various learnwares to the learnware doc system, and the market conducts quality checks and further organization of these learnwares. When users submit task requirements, the learnware doc system automatically selects whether to recommend a single learnware or a combination of multiple learnwares. -The ``Learnware Market`` will receive various kinds of learnwares, and learnwares from different feature/label spaces form numerous islands of specifications. All these islands constitute the ``specification world`` in the learnware market. The market should discover and establish connections between different islands and merge them into a unified specification world. This further organization of learnwares supports search learnwares among all learnwares, not just among learnwares that have the same feature space and label space with the user's task requirements. +The ``Learnware Market`` will receive various kinds of learnwares, and learnwares from different feature and prediction spaces form numerous islands of specifications. Collectively, these islands constitute the ``specification world`` in the learnware doc system. The doc system should discover and establish connections between different islands and integrate them into a unified specification world, with the hope of broadening the search scope and preliminarily supporting learnware identification from the entire learnware collection, not just among learnwares that share the same feature and prediction space with the user's task requirements. Framework ====================================== -The ``Learnware Market`` is combined with a ``organizer``, a ``searcher``, and a list of ``checker``\ s. +The ``Learnware Market`` implements the market module which is designed for learnware organization, identification and usability testing. A single market module consists of one ``organizer`` module, one ``searcher`` module, and multiple ``checker`` modules. -The ``organizer`` can store and organize learnwares in the market. It supports ``add``, ``delete``, and ``update`` operations for learnwares. It also provides the interface for the ``searcher`` to search learnwares based on user requirements. +The ``organizer`` module oversees the storage and organization of learnware, supporting operations such as reloading the entire learnware collection and performing insertions, deletions and updates. -The ``searcher`` can search learnwares based on user requirements. The implementation of ``searcher`` depends on the concrete implementation and interface for ``organizer``, where usually an ``organizer`` can be compatible with multiple different ``searcher``\ s. +The ``searcher`` module conducts learnware identification based on user information, which encompasses statistical and semantic specifications. It implements several ``searcher``\ s to retrieve learnwares that meet user requirements and recommends them as search results, where each ``searcher`` employs a different search algorithm. -The ``checker`` is used for checking the learnware in some standards. It should check the utility of a learnware and return the status and a message related to the learnware's check result. Only the learnwares who passed the ``checker`` could be able to be stored and added into the ``Learnware Market``. +The ``checker`` module is responsible for checking the usability and quality of learnwares by verifying the availability of semantic and statistical specifications and creating a runtime environment to test learnware models based on the model container. The learnwares that pass the ``checker`` module are then inserted and stored by the organizer module, appearing in the ``Learnware Market``. Current Checkers ====================================== -The ``learnware`` package provides two different implementations of ``Learnware Market`` where both share the same ``checker`` list. So we first introduce the details of ``checker``\ s. - -The ``checker``\ s check a learnware object in different aspects, including environment configuration (``CondaChecker``), semantic specifications (``EasySemanticChecker``), and statistical specifications (``EasyStatChecker``). Each checker's ``__call__`` method is designed to be invoked as a function to conduct the respective checks on the learnware and return the outcomes. It defines three types of learnwares: ``INVALID_LEARNWARE`` denotes the learnware does not pass the check, ``NONUSABLE_LEARNWARE`` denotes the learnware passes the check but cannot make predictions, ``USABLE_LEARNWARE`` denotes the leanrware pass the check and can make predictions. Currently, we have three ``checker``\ s, which are described below. +The ``checker`` module checks a learnware from different aspects using different ``checker``\ s, including environment configuration (``CondaChecker``), semantic specifications (``EasySemanticChecker``), and statistical specifications (``EasyStatChecker``). +Each checker's ``__call__`` method is designed to be invoked as a function to conduct the respective checks on the learnware and return the outcomes. +Three types of learnware statuses are defined: ``INVALID_LEARNWARE`` indicates the learnware fails the check, +``NONUSABLE_LEARNWARE`` indicates the learnware passes the check but is unable to make predictions, ``USABLE_LEARNWARE`` denotes the learnware passes the check and can make predictions. +Currently, there are three implemented ``checker``\ s within this module, described as follows. ``CondaChecker`` ------------------ -This ``checker`` checks the environment of the learnware object. It creates a ``LearnwaresContainer`` instance to handle the Learnware and uses ``inner_checker`` to check the Learnware. If an exception occurs, it logs the error and returns the ``NONUSABLE_LEARNWARE`` status and error message. +This ``checker`` checks the environment of the learnware object. It creates a ``LearnwaresContainer`` instance to containerize the learnware and uses ``inner_checker`` to check the Learnware. If an exception occurs, it logs the error and returns the ``NONUSABLE_LEARNWARE`` status with error message. ``EasySemanticChecker`` @@ -48,12 +50,13 @@ This ``checker`` checks the statistical specification and functionality of a lea Current Markets ====================================== -The ``learnware`` package provides two different implementations of ``market``, i.e., ``Easy Market`` and ``Hetero Market``. They have different implementations of ``organizer`` and ``searcher``. +The ``learnware`` package provides two different implementations of ``market``, i.e., ``Easy Market`` and ``Hetero Market``. +They share the same ``checker`` module and have different implementations of ``organizer`` and ``searcher``. Easy Market ------------- -Easy market is a basic realization of the learnware market. It consists of ``EasyOrganizer``, ``EasySearcher``, and the checker list ``[EasySemanticChecker, EasyStatChecker]``. +Easy market is a basic realization of the learnware doc system. It consists of ``EasyOrganizer``, ``EasySearcher``, and the checker list ``[EasySemanticChecker, EasyStatChecker]``. ``Easy Organizer`` @@ -73,8 +76,7 @@ Easy market is a basic realization of the learnware market. It consists of ``Eas ``EasySearcher`` consists of ``EasyFuzzsemanticSearcher`` and ``EasyStatSearcher``. ``EasyFuzzsemanticSearcher`` is a kind of ``Semantic Specification Searcher``, while ``EasyStatSearcher`` is a kind of ``Statistical Specification Searcher``. All these searchers return helpful learnwares based on ``BaseUserInfo`` provided by users. -``BaseUserInfo`` is a ``Python API`` for users to provide enough information to identify helpful learnwares. -When initializing ``BaseUserInfo``, three optional information can be provided: ``id``, ``semantic_spec`` and ``stat_info``. These specifications' introductions are shown in `COMPONENTS: Specification <./spec.html>`_. +``BaseUserInfo`` is a ``Python API`` for users to provide enough information to identify helpful learnwares. When initializing ``BaseUserInfo``, three optional information can be provided: ``id``, ``semantic_spec`` and ``stat_info``. These specifications' introductions are shown in `COMPONENTS: Specification <./spec.html>`_. The semantic specification search and statistical specification search have been integrated into the same interface ``EasySearcher``. @@ -89,13 +91,13 @@ The semantic specification search and statistical specification search have been ``Semantic Specification Searcher`` is the first-stage search based on ``user_semantic``, identifying potentially helpful learnwares whose models solve tasks similar to your requirements. There are two types of Semantic Specification Search: ``EasyExactSemanticSearcher`` and ``EasyFuzzSemanticSearcher``. -In these two searchers, each learnware in the ``learnware_list`` is compared with ``user_info`` according to their ``semantic_spec`` and added to the search result if matched. Two semantic_spec are matched when all the key words are matched or empty in ``user_info``. Different keys have different matching rules. Their ``__call__`` functions are the same: +In these two searchers, each learnware in the ``learnware_list`` is compared with ``user_info`` based on their ``semantic_spec``. A learnware is added to the search result if a match is found. Two ``semantic_spec``\ s are considered matched when all the key words either match or are empty in ``user_info``. Different keys follow different matching rules. The ``__call__`` function for these searchers are the same: - **EasyExactSemanticSearcher/EasyFuzzSemanticSearcher.__call__(self, learnware_list: List[Learnware], user_info: BaseUserInfo)-> SearchResults** - - For keys ``Data``, ``Task``, ``Library`` and ``license``, two``semantic_spec`` keys are matched only if these values(only one value foreach key) of learnware ``semantic_spec`` exists in values(may be muliplevalues for one key) of user ``semantic_spec``. - - For the key ``Scenario``, two ``semantic_spec`` keys are matched iftheir values have nonempty intersections. - - For keys ``Name`` and ``Description``, the values are strings and caseis ignored. In ``EasyExactSemanticSearcher``, two ``semantic_spec`` keys are matched if these values of learnware ``semantic_spec`` is a substring of user ``semantic_spec``. In ``EasyFuzzSemanticSearcher``, it starts with the same kind of exact semantic search as ``EasyExactSemanticSearcher``. If the result is empty, the fuzz semantic searcher is activated: the ``learnware_list`` is sorted according to the fuzz score function ``fuzzpartial_ratio`` in ``rapidfuzz``. + - For the keys ``Data``, ``Task``, ``Library``, and ``license`` in ``semantic_spec``, a match occurs only when the value (only one value for each key) in a learnware's ``semantic_spec`` is also found in the values (which may be multiple for one key) in the user's ``semantic_spec``. + - For the key ``Scenario``, two ``semantic_spec`` keys are matched if their values have nonempty intersections. + - For the keys ``Name`` and ``Description``, the values are strings and case sensitivity is ignored. In ``EasyExactSemanticSearcher``, two ``semantic_spec`` keys are matched if these values in the learnware ``semantic_spec`` is a substring of the corresponding values in the user ``semantic_spec``. ``EasyFuzzSemanticSearcher`` begins with the same exact semantic search as ``EasyExactSemanticSearcher``. If no results are found, it activates a fuzz semantic searcher: the ``learnware_list`` is then sorted according to the fuzz score function ``fuzzpartial_ratio`` provided by ``rapidfuzz``. The results are returned and stored in ``single_results`` of ``SearchResults``. @@ -103,28 +105,28 @@ The results are returned and stored in ``single_results`` of ``SearchResults``. ``Statistical Specification Searcher`` '''''''''''''''''''''''''''''''''''''''''' -If the user's statistical specification ``stat_info`` is provided, the learnware market can perform a more accurate learnware selection using ``EasyStatSearcher``. +If the user's statistical specification ``stat_info`` is provided, the learnware doc system can perform more targeted learnware identification using ``EasyStatSearcher``. - **EasyStatSearcher.__call__(self, learnware_list: List[Learnware], user_info: BaseUserInfo, max_search_num: int = 5, search_method: str = "greedy",) -> SearchResults** - It searches for helpful learnwares from ``learnware_list`` based on the ``stat_info`` in ``user_info``. - - The result ``SingleSearchItem`` and ``MultipleSearchItem`` are both stored in ``SearchResults``. In ``SingleSearchItem``, it searches for individual learnware solutions for the user's task, and it also assigns scores to indicate the compatibility of each learnware with the user's task. In ``MultipleSearchItem``, it searches for a mixture of learnwares that could solve the user task better; the mixture learnware list and a score for the mixture are returned. - - The parameter ``search_method`` provides two choice of search strategies for mixture learnwares: ``greedy`` and ``auto``. For the search method ``greedy``, each time it chooses a learnware to make their mixture closer to the user's ``stat_info``; for the search method ``auto``, it directly calculates the best mixture weight for the ``learnware_list``. - - For single learnware search, we only return the learnwares with a score larger than 0.6. For multiple learnware search, the parameter ``max_search_num`` specifies the maximum length of the returned mixture learnware list. + - ``SingleSearchItem`` and ``MultipleSearchItem`` are types of results stored in ``SearchResults`. ``SingleSearchItem``` contains single recommended learnwares for the user's task, along with scores indicating each learnware's compatibility with the task. ``MultipleSearchItem`` includes a combination of learnwares, attempting to address the task better, and provides an overall score for this mixture. + - The parameter ``search_method`` offers two options for search strategies of mixture learnwares: ``greedy`` and ``auto``. With the ``greedy`` method, it incrementally adds learnwares that significantly reduce the distribution distance, thereby bringing the mixture closer te the user's ``stat_info``. With the the search method ``auto``, it directly calculates the optimal mixture weights for the ``learnware_list``. + - For single learnware search, only learnwares with a score higher than 0.6 are returned. For multiple learnware search, the parameter ``max_search_num`` specifies the maximum number of learnwares in the returned mixture learnware list. ``Easy Checker`` ++++++++++++++++++++ -``EasySemanticChecker`` and ``EasyStatChecker`` are used to check the validity of the learnwares. They are used as: +``EasySemanticChecker`` and ``EasyStatChecker`` are used to verify the validity of the learnwares: -- ``EasySemanticChecker`` mainly check the integrity and legitimacy of the ``semantic_spec`` in the learnware. A legal ``semantic_spec`` should include all the keys, and the type of each key should meet our requirements. For keys with type ``Class``, the values should be unique and in our ``valid_list``; for keys with type ``Tag``, the values should not be empty; for keys with type ``String``, a non-empty string is expected as the value; for a table learnware, the dimensions and description of inputs are needed; for ``classification`` or ``regression`` learnwares, the dimensions and description of outputs are indispensable. The learnwares that pass the ``EasySemanticChecker`` is marked as ``NONUSABLE_LEARNWARE``; otherwise, it is ``INVALID_LEARNWARE``, and error information will be returned. -- ``EasyStatChecker`` mainly check the ``model`` and ``stat_spec`` of the learnwares. It includes the following steps: +- ``EasySemanticChecker`` checks the integrity and legitimacy of the ``semantic_spec`` in learnware. (1) A valid ``semantic_spec`` must include all necessary keys, with each key's type conforming to specified requirements. For ``Class`` type keys, values should be unique and in the ``valid_list``; for ``Tag`` type keys, values should not be empty; for ``String`` type keys, a non-empty string is expected. (2) Tabular learnwares should include input dimensions and feature descriptions within their ``semantic_spec``; (3) ``Classification`` or ``Regression`` learnwares should provide output dimensions and descriptions. Learnwares passing the ``EasySemanticChecker`` are marked as ``NONUSABLE_LEARNWARE``; otherwise, as ``INVALID_LEARNWARE``, with error information returned. +- ``EasyStatChecker`` checks the ``model`` and ``stat_spec`` of the learnwares, involving: - - **Check model instantiation**: ``learnware.instantiate_model`` to instantiate the model and transform it to a ``BaseModel``. - - **Check input shape**: Check whether the shape of ``semantic_spec`` input(if it exists), ``learnware.input_shape``, and the shape of ``stat_spec`` are consistent, and then generate an example input with that shape. - - **Check model prediction**: Use the model to predict the label of the example input and record the output shape. - - **Check output shape**: For ``Classification``, ``Regression`` and ``Feature Extraction`` tasks, the output shape should be consistent with that in ``semantic_spec`` and ``learnware.output_shape``. Besides, for ``Regression`` tasks, the output should be a legal class in ``semantic_spec``. + - **Model instantiation check**: Utilizing ``learnware.instantiate_model`` to instantiate the model as a ``BaseModel``. + - **Input shape check**: Checking whether the ``semantic_spec`` input shape (if present), ``learnware.input_shape``, and ``stat_spec`` shape are consistent, and then generating an example input of that shape. + - **Model prediction check**: Using the model to predict the label of the example input and recording the model output. + - **Output shape check**: For ``Classification``, ``Regression``, and ``Feature Extraction`` tasks, the output's shape should align with ``semantic_spec`` and ``learnware.output_shape``. For ``Regression`` tasks, the output's shape should also be consistent with the output dimension provided in the ``semantic_spec``. For ``Classification`` tasks, the output should either contain valid classification labels or match the output dimension provided in the ``semantic_spec``. If any step above fails or meets an error, the learnware will be marked as ``INVALID_LEARNWARE``. The learnwares that pass the ``EasyStatChecker`` are marked as ``USABLE_LEARNWARE``. @@ -133,29 +135,30 @@ Hetero Market ------------- The Hetero Market encompasses ``HeteroMapTableOrganizer``, ``HeteroSearcher``, and the checker list ``[EasySemanticChecker, EasyStatChecker]``. -It represents an extended version of the Easy Market, capable of accommodating table learnwares from diverse feature spaces (referred to as heterogeneous table learnwares), thereby broadening the applicable scope of the learnware paradigm. -This market trains a heterogeneous engine by utilizing existing learnware specifications to merge distinct specification islands and assign new specifications, referred to as ``HeteroMapTableSpecification``, to learnwares. -As more learnwares are submitted, the heterogeneous engine will undergo continuous updates, with the aim of constructing a more precise specification world. +It represents an preliminary extension of the Easy Market, designed to support tabular tasks, with the aim of accommodating tabular learnwares from diverse feature spaces (referred to as heterogeneous table learnwares), +This extension thereby broadens the search scope and facilitates learnware identification and reuse across the entire learnware selection. +The Hetero Market utilizes existing learnware specifications to train a heterogeneous engine, which merges distinct specification islands and assigns new specifications, known as ``HeteroMapTableSpecification``, to learnwares. +As more learnwares are submitted, this heterogeneous engine will continuously update, hopefully leading to a more precise specification world. ``HeteroMapTableOrganizer`` +++++++++++++++++++++++++++ -``HeteroMapTableOrganizer`` overrides methods from ``EasyOrganizer`` and implements new methods to support the organization of heterogeneous table learnwares. Key features include: +``HeteroMapTableOrganizer`` overrides methods from ``EasyOrganizer`` and implements new methods to support the management of heterogeneous table learnwares. Key features include: - **reload_market**: Reloads the heterogeneous engine if there is one. Otherwise, initialize an engine with default configurations. Returns a flag indicating whether the market is reloaded successfully. - **reset**: Resets the heterogeneous market with specific settings regarding the heterogeneous engine such as ``auto_update``, ``auto_update_limit`` and ``training_args`` configurations. -- **add_learnware**: Add a learnware into the market, meanwhile assigning ``HeteroMapTableSpecification`` to the learnware using the heterogeneous engine. The engine's update process will be triggered if ``auto_update`` is set to True and the number of learnwares in the market with ``USABLE_LEARNWARE`` status exceeds ``auto_update_limit``. Return the ``learnware_id`` and ``learnwere_status``. +- **add_learnware**: Add a learnware into the market, meanwhile generating ``HeteroMapTableSpecification`` for the learnware using the heterogeneous engine. The engine's update process will be triggered if ``auto_update`` is set to True and the number of learnwares in the market with ``USABLE_LEARNWARE`` status exceeds ``auto_update_limit``. Return the ``learnware_id`` and ``learnwere_status``. - **delete_learnware**: Removes the learnware with ``id`` from the market and also removes its new specification if there is one. Return a flag of whether the deletion is successful. - **update_learnware**: Update the learnware's ``zip_path``, ``semantic_spec``, ``check_status`` and its new specification if there is one. Return a flag indicating whether it passed the ``checker``. -- **generate_hetero_map_spec**: Generate ``HeteroMapTableSpecification`` for users based on the information provided in ``user_info``. +- **generate_hetero_map_spec**: Generate ``HeteroMapTableSpecification`` for users based on the user's statistical specification provided in ``user_info``. - **train**: Build the heterogeneous engine using learnwares from the market that supports heterogeneous market training. ``HeteroSearcher`` ++++++++++++++++++ -``HeteroSearcher`` builds upon ``EasySearcher`` with additional support for searching among heterogeneous table learnwares, returning helpful learnwares with feature space and label space different from the user's task requirements. +``HeteroSearcher`` builds upon ``EasySearcher`` with additional support for searching among heterogeneous table learnwares, returning potentially helpful learnwares with feature and prediction spaces different from the user's task requirements. The semantic specification search and statistical specification search have been integrated into the same interface ``HeteroSearcher``. - **HeteroSearcher.__call__(self, user_info: BaseUserInfo, check_status: int = None, max_search_num: int = 5, search_method: str = "greedy") -> SearchResults** diff --git a/docs/components/model.rst b/docs/components/model.rst index 76952ea9..7c336744 100644 --- a/docs/components/model.rst +++ b/docs/components/model.rst @@ -5,19 +5,16 @@ Model A learnware is a well-performed trained model with a specification, where the model is an indispensable component of the learnware. - -In this section, we will first introduce the ``BaseModel``, which defines the standard format for models in the learnware package. -Following that, we will introduce the ``ModelContainer``, which implements model deployment in conda virtual environments and Docker containers. +In this section, we will introduce the model module implemented within ``learnware`` package. We will first introduce the ``BaseModel``, which defines the standard format for models in the ``learnware`` package. Following that, we will introduce the ``ModelContainer``, which implements model deployment in conda virtual environments and Docker containers. BaseModel ====================================== -The ``BaseModel`` class is a fundamental component of the learnware package and serves as a standard interface for defining machine learning models. +The ``BaseModel`` class is a fundamental component of the learnware package which provides standardized interface for model training, prediction and fine-tuning. This class is created to make it easier for users to submit learnwares to the market. It helps ensure that submitted models follow a clear set of rules and requirements. -The model in a learnware should inherit the ``BaseModel`` class. -Here's a more detailed explanation of key components: +All user models should inherit the ``BaseModel`` class. Here's a more detailed explanation of key components: - ``input_shape``: Specify the shape of the input features your model expects. - ``output_shape``: Define the shape of the output predictions generated by your model. diff --git a/docs/components/spec.rst b/docs/components/spec.rst index ed116f0b..e3fe5f8d 100644 --- a/docs/components/spec.rst +++ b/docs/components/spec.rst @@ -3,11 +3,11 @@ Specification ================================ -Learnware specification is the core component of the learnware paradigm, linking all processes about learnwares, including uploading, organizing, searching, deploying, and reusing. +Learnware specification is the central component of the learnware paradigm, linking all processes related to learnwares, including uploading, organizing, searching, deploying, and reusing. -In this section, we will introduce the concept and design of learnware specification in the ``learnware`` package. -We will then explore ``regular specification``\ s tailored for different data types such as tables, images, and texts. -Lastly, we cover a ``system specification`` specifically assigned to table learnwares by the learnware market, aimed at accommodating all available table learnwares into a unified "specification world" despite their heterogeneity. +In this section, we will introduce the concept and design of learnware specification within the ``learnware`` package. +We will then explore ``regular specification``\ s covering data types including tables, images, and texts. +Lastly, we introduce a ``system specification`` specifically generated for tabular learnwares by the learnware doc system using its knowledge, enhancing learnware management and further characterizing their capabilities. Concepts & Types ================== @@ -18,18 +18,18 @@ The ``learnware`` package employs a highly extensible specification design, whic - **Semantic specification** describes the model's type and functionality through a set of descriptions and tags. Learnwares with similar semantic specifications reside in the same specification island - **Statistical specification** characterizes the statistical information contained in the model using various machine learning techniques. It plays a crucial role in locating the appropriate place for the model within the specification island. -When searching in the learnware market, the system first locates specification islands based on the semantic specification of the user's task, -then pinpoints highly beneficial learnwares on these islands based on the statistical specification of the user's task. +When searching in the learnware doc system, the system first locates specification islands based on the semantic specification of the user's task, +then pinpoints potentially beneficial learnwares on these islands based on the statistical specification of the user's task. Statistical Specification --------------------------- -We employ the ``Reduced Kernel Mean Embedding (RKME) Specification`` as the foundation for implementing statistical specification for diverse data types, +We employ the ``Reduced Kernel Mean Embedding (RKME) Specification`` as the basis for implementing statistical specification for diverse data types, with adjustments made according to the characteristics of each data type. -The RKME specification is a recent development in learnware specification design, which represents the distribution of a model's training data in a privacy-preserving manner. +The RKME specification is a recent development in learnware specification design, which captures the data distribution while not disclosing the raw data . -Within the ``learnware`` package, you will find two types of statistical specifications: ``regular specification`` and ``system specification``. The former is generated locally -by users to express their model's statistical information, while the learnware market assigns the latter to accommodate and organize heterogeneous learnwares. +There are two types of statistical specifications within the ``learnware`` package: ``regular specification`` and ``system specification``. The former is generated locally +by users to express their model's statistical information. In contrast, the latter is generated by the learnware doc system to enhance learnware management and further characterizing the learnwares' capabilities. Semantic Specification ----------------------- @@ -37,8 +37,8 @@ Semantic Specification The semantic specification consists of a "dict" structure that includes keywords "Data", "Task", "Library", "Scenario", "License", "Description", and "Name". In the case of table learnwares, users should additionally provide descriptions for each feature dimension and output dimension through the "Input" and "Output" keywords. -- If "data_type" is "Table", you need to specify the semantics of each dimension of the model's input data to make the uploaded learnware suitable for tasks with heterogeneous feature spaces. -- If "task_type" is "Classification", you need to provide the semantics of model output labels (prediction labels start from 0), making the uploaded learnware suitable for classification tasks with heterogeneous output spaces. +- If "data_type" is "Table", you need to specify the semantics of each dimension of the model's input data for compatibility with tasks in heterogeneous feature spaces. +- If "task_type" is "Classification", you need to provide the semantics of model output labels (prediction labels start from 0) for use in classification tasks with heterogeneous output spaces. - If "task_type" is "Regression", you need to specify the semantics of each dimension of the model output, making the uploaded learnware suitable for regression tasks with heterogeneous output spaces. Regular Specification @@ -56,7 +56,7 @@ as shown in the following code: regular_spec = generate_stat_spec(type=data_type, x=train_x) regular_spec.save("stat.json") -It is worth noting that the above code only runs on the user's local computer and does not interact with cloud servers or leak local private data. +It is worth noting that the above code only runs on the user's local computer and does not interact with cloud servers or leak local raw data. .. note:: @@ -65,14 +65,17 @@ It is worth noting that the above code only runs on the user's local computer an Table Specification -------------------------- -The ``regular specification`` for tabular learnware is essentially the RKME specification of the model's training table data. No additional adjustment is needed. +``RKMETableSpecification`` implements the RKME specification, which is the basis of tabular learnwares. It facilitates learnware identification and reuse for homogeneous tasks with identical input and output domains. Image Specification -------------------------- -Image data lives in a higher dimensional space than other data types. Unlike lower dimensional spaces, metrics defined based on Euclidean distances (or similar distances) will fail in higher dimensional spaces. This means that measuring the similarity between image samples becomes difficult. +Image data lives in a higher dimensional space than other data types. Unlike lower dimensional spaces, +metrics defined based on Euclidean distances (or similar distances) will fail in higher dimensional spaces. +This means that measuring the similarity between image samples becomes difficult. -To address these issues, we use the Neural Tangent Kernel (NTK) based on Convolutional Neural Networks (CNN) to measure the similarity of image samples. As we all know, CNN has greatly advanced the field of computer vision and is still a mainstream deep-learning technique. +The specification for image data ``RKMEImageSpecification`` introduces a new kernel function that transforms images implicitly before RKME calculation. +It employs the Neural Tangent Kernel (NTK) [1]_, a theoretical tool that characterizes the training dynamics of deep neural networks in the infinite width limit, to enhance the measurement of image similarity in high-dimensional spaces. Usage & Example ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -102,14 +105,14 @@ By randomly sampling a subset of the dataset, we can efficiently construct Image spec = generate_rkme_image_spec(X, sample_size=5000) spec.save("cifar10.json") -Privacy Protection +Raw Data Protection ^^^^^^^^^^^^^^^^^^^^^^^^^^ -In the third row of the figure, we show the eight pseudo-data with the largest weights`\beta` in the Image Specification generated on the CIFAR-10 dataset. -Notice that the Image Specification generated based on Neural Tangent Kernel (NTK) protects the user's privacy very well. +In the third row of the figure, we show the eight pseudo-data with the largest weights :math:`\beta` in the ``RKMEImageSpecification`` generated on the CIFAR-10 dataset. +Notice that the ``RKMEImageSpecification`` generated based on Neural Tangent Kernel (NTK) doesn't compromise raw data security. In contrast, we show the performance of the RBF kernel on image data in the first row of the figure below. -The RBF not only exposes the real data (plotted in the corresponding position in the second row) but also fails to fully utilize the weights :math:`\beta`. +The RBF not only exposes the original data (plotted in the corresponding position in the second row) but also fails to fully utilize the weights :math:`\beta`. .. image:: ../_static/img/image_spec.png :align: center @@ -122,19 +125,16 @@ Different from tabular data, each text input is a string of different length, so System Specification ====================================== -In contrast to ``regular specification``\ s, which are generated solely by users, -``system specification``\ s are higher-level statistical specifications assigned by learnware markets -to effectively accommodate and organize heterogeneous learnwares. -This implies that ``regular specification``\ s are usually applicable across different markets, while ``system specification``\ s are generally closely associated -with particular learnware market implementations. +In addition to ``regular specification``\ s, the learnware doc system leverages its knowledge to generate new ``system specification``\ s for learnwares. +The ``system specification`` module is automatically generated by the doc system. For newly inserted learnwares, the ``organizer`` generates new system specifications based on existing learnware statistical specifications to facilitate search operations and expand the search scope. -``system specification`` plays a critical role in heterogeneous markets such as the ``Hetero Market``: -- Learnware organizers use these specifications to connect isolated specification islands into unified "specification world"s. -- Learnware searchers perform helpful learnware recommendations among all table learnwares in the market, leveraging the ``system specification``\ s generated for users. +Currently, the ``learnware`` package has implemented the ``HeteroMapTableSpecification`` which enables learnwares organized by the ``Hetero Market`` to support tasks with varying feature and prediction spaces. +This specification is derived by mapping the ``RKMETableSpecification`` to a unified semantic embedding space, utilizing the heterogenous engine which is a tabular network trained on feature semantics of all tabular learnwares. +Please refer to `COMPONENTS: Hetero Market <../components/market.html#hetero-market>`_ for implementation details. -The ``learnware`` package now includes a type of ``system specification``, named ``HeteroMapTableSpecification``, made especially for the ``Hetero Market`` implementation. -This specification is automatically given to all table learnwares when they are added to the ``Hetero Market``. -It is also set up to be updated periodically, ensuring it remains accurate as the learnware market evolves and builds more precise specification worlds. -Please refer to `COMPONENTS: Hetero Market <../components/market.html#hetero-market>`_ for implementation details. \ No newline at end of file +References +----------- + +.. [1] Adrià Garriga-Alonso, Laurence Aitchison, and Carl Edward Rasmussen. Deep convolutional networks as shallow gaussian processes. In *International Conference on Learning Representations*, 2019. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 946a5c51..0ea88afb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,9 +7,11 @@ ``Learnware`` Documentation ============================================================ -The ``learnware`` package provides a fundamental implementation of the central concepts and procedures for the learnware paradigm. -A learnware is a well-performed trained machine learning model with a specification that enables it to be adequately identified to reuse according to the requirement of future users who may know nothing about the learnware in advance. -The learnware paradigm is a new paradigm aimed at enabling users to reuse existed well-trained models to solve their AI tasks instead of starting from scratch. +The ``learnware`` package provides a fundamental implementation of the central concepts and procedures and encompasses all processes within the *learnware paradigm*, +including the submitting, usability testing, organization, identification, deployment and reuse of learnwares. +Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. +In addition, the ``learnware`` package serves as the core engine for the `Beimingwu System `_, which supports the computational and algorithmic aspects of ``Beimingwu`` +and offers rich algorithmic interfaces for learnware-related tasks and research experiments. .. _user_guide: diff --git a/docs/references/beimingwu.rst b/docs/references/beimingwu.rst index e5f626ba..a25a6b17 100644 --- a/docs/references/beimingwu.rst +++ b/docs/references/beimingwu.rst @@ -5,25 +5,25 @@ Beimingwu System `Beimingwu `_ is the first systematic open-source implementation of learnware dock system, providing a preliminary research platform for learnware studies. Developers worldwide can submit their models freely to the learnware dock. They can generate specifications for the model with the help of Beimingwu without disclosing their raw data, and then the model and specification can be assembled into a learnware, which will be accommodated in the learnware dock. Future users can solve their tasks by submitting their requirements and reusing helpful learnwares returned by Beimingwu, while also not disclosing their own data. It is anticipated that after Beimingwu accumulates millions of learnwares, an "emergent" behavior may occur: machine learning tasks that have never been specifically tackled may be solved by assembling and reusing some existing learnwares. -The ``learnware`` package is the cornerstone of the Beimingwu system, functioning as its core engine. -It offers a comprehensive suite of central APIs that encompass a wide range of functionalities, including the submission, verification, organization, search, and deployment of learnware. -This integration ensures a streamlined and efficient process, facilitating seamless interactions within the system. +The ``learnware`` package serves as the core engine for the ``Beimingwu`` system, which supports the computational and algorithmic aspects of ``Beimingwu``. +It offers a comprehensive suite of unified and scalable interfaces that encompass all processes within the learnware paradigm, including the submitting, usability testing, organization, management, identification, deployment and reuse of learnware. +This integration ensures a streamlined and efficient process, facilitating seamless interactions within the system and provides a foundation for future research in organization, identification and reuse algorithms. Core Features in the Beimingwu System ======================================= The Beimingwu learnware dock system, serving as a preliminary research platform for learnware, systematically implements the core processes of the learnware paradigm for the first time: -- ``Submitting Stage``: The system includes multiple detection mechanisms to ensure the quality of uploaded learnwares. Additionally, the system trains a heterogeneous engine based on existing learnware specifications in the system to merge different specification islands and assign new specifications to learnwares. With more learnwares are submitted, the heterogeneous engine will continue to update, achieving continuous iteration of learnware specifications and building a more precise specification world. -- ``Deploying Stage``: After users upload task requirements, the system automatically selects whether to recommend a single learnware or multiple learnware combinations and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse tools. +- ``Submitting Stage``: The system includes multiple detection mechanisms to ensure the quality of uploaded learnwares. Additionally, the system trains a heterogeneous engine based on existing learnware specifications in the system to merge different specification islands and assign new specifications to learnwares. With the submission of more learnwares, the heterogeneous engine will continually update, aiming to construct a more precise specification world through the constant iteration of learnware specifications. +- ``Deploying Stage``: After users upload task requirements, the system automatically selects whether to recommend a single learnware or multiple learnware combinations and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers baseline learnware reuse methods in a uniform format for convenient usage. In addition, the Beimingwu system also has the following features: -- ``Learnware Specification Generation``: The Beimingwu system provides specification generation interfaces in the learnware package, supporting various data types (tables, images, and text) for efficient local generation. +- ``Learnware Specification Generation``: The Beimingwu system provides specification generation interfaces in the ``learnware`` package, supporting various data types (tables, images, and text) for efficient local generation. - ``Learnware Quality Inspection``: The Beimingwu system includes multiple detection mechanisms to ensure the quality of each learnware in the system. -- ``Diverse Learnware Search``: The Beimingwu system supports both semantic specifications and statistical specifications searches, covering data types such as tables, images, and text. In addition, for table-based tasks, the system also supports the search for heterogeneous table learnwares. -- ``Local Learnware Deployment``: The Beimingwu system provides interfaces for learnware deployment and learnware reuse in the learnware package, facilitating users' convenient and secure learnware deployment. -- ``Data Privacy Protection``: The Beimingwu system operations, including learnware upload, search, and deployment, do not require users to upload local data. All relevant statistical specifications are generated locally by users, ensuring data privacy. -- ``Open Source System``: The Beimingwu system's source code is open-source, including the learnware package and frontend/backend code. The learnware package is highly extensible, making it easy to integrate new specification designs, learnware system designs, and learnware reuse methods in the future. +- ``Diverse Learnware Search``: The Beimingwu system supports both semantic specifications and statistical specifications searches, covering data types such as tables, images, and text. In addition, for table-based tasks, the system preliminarily supports the search for heterogeneous table learnwares. +- ``Local Learnware Deployment``: The Beimingwu system provides a unified interface for learnware deployment and learnware reuse in the ``learnware`` package, facilitating users' convenient and secure deployment and reuse of arbitrary learnwares. +- ``Raw Data Protection``: The Beimingwu system operations, including learnware upload, search, and deployment, do not require users to upload raw data. All relevant statistical specifications are generated locally by users using open-source API. +- ``Open Source System``: The Beimingwu system's source code is open-source, including the learnware package and frontend/backend code. The ``learnware`` package is highly extensible, making it easy to integrate new specification designs, learnware system designs, and learnware reuse methods in the future. Building the learnware paradigm requires collective efforts from the community. As the first learnware dock system, Beimingwu is still in its early stages, with much room for improvement in related technologies. We sincerely invite the community to upload models, collaborate in system development, and engage in research and enhancements in learnware algorithms. Your valuable feedback is essential for the continuous improvement of the system. \ No newline at end of file diff --git a/docs/start/exp.rst b/docs/start/exp.rst index b6839299..b18888f3 100644 --- a/docs/start/exp.rst +++ b/docs/start/exp.rst @@ -4,7 +4,7 @@ Experiments and Examples ================================ -This chapter will introduce related experiments to illustrate the search and reuse performance of our learnware system. +In this section, we build various types of experimental scenarios and conduct extensive empirical study to evaluate the baseline algorithms, implemented and refined in the ``learnware`` package, for specification generation, learnware identification, and reuse on tabular, image, and text data. Environment ==================== diff --git a/docs/start/intro.rst b/docs/start/intro.rst index d1dcd6e5..b29553b7 100644 --- a/docs/start/intro.rst +++ b/docs/start/intro.rst @@ -5,9 +5,9 @@ Introduction *Learnware* was proposed by Professor Zhi-Hua Zhou in 2016 [1, 2]. In the *learnware paradigm*, developers worldwide can share models with the *learnware dock system*, which effectively searches for and reuse learnware(s) to help users solve machine learning tasks efficiently without starting from scratch. -The ``learnware`` package provides a fundamental implementation of the central concepts and procedures within the learnware paradigm. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. +The ``learnware`` package provides a fundamental implementation of the central concepts and procedures and encompasses all processes within the *learnware paradigm*, including the submitting, usability testing, organization, identification, deployment and reuse of learnwares. Its well-structured design ensures high scalability and facilitates the seamless integration of additional features and techniques in the future. -In addition, the ``learnware`` package serves as the engine for the `Beimingwu System `_ and can be effectively employed for conducting experiments related to learnware. +In addition, the ``learnware`` package serves as the core engine for the `Beimingwu System `_, which supports the computational and algorithmic aspects of ``Beimingwu`` and offers rich algorithmic interfaces for learnware-related tasks and research experiments. | [1] Zhi-Hua Zhou. Learnware: on the future of machine learning. *Frontiers of Computer Science*, 2016, 10(4): 589–590 | [2] Zhi-Hua Zhou. Machine Learning: Development and Future. *Communications of CCF*, 2017, vol.13, no.1 (2016 CNCC keynote) @@ -32,31 +32,30 @@ The Benefits of Learnware Paradigm Machine learning has achieved great success in many fields but still faces various challenges, such as the need for extensive training data and advanced training techniques, the difficulty of continuous learning, the risk of catastrophic forgetting, and the leakage of data privacy. -Although there are many efforts focusing on one of these issues separately, they are entangled, and solving one problem may exacerbate others. The learnware paradigm aims to address many of these challenges through a unified framework. - -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Benefit | Description | -+=======================+===============================================================================================+ -| Lack of training data | Strong models can be built with small data by adapting well-performed learnwares. | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Lack of training | Ordinary users can obtain strong models by leveraging well-performed learnwares instead of | -| skills | building models from scratch. | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Catastrophic | Accepted learnwares are always stored in the learnware market, retaining old knowledge. | -| forgetting | | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Continual learning | The learnware market continually enriches its knowledge with constant submissions of | -| | well-performed learnwares. | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Data privacy/ | Developers only submit models, not data, preserving data privacy/proprietary. | -| proprietary | | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Unplanned tasks | Open to all legal developers, the learnware market can accommodate helpful learnwares for | -| | various tasks. | -+-----------------------+-----------------------------------------------------------------------------------------------+ -| Carbon emission | Assembling small models may offer good-enough performance, reducing interest in training | -| | large models and the carbon footprint. | -+-----------------------+-----------------------------------------------------------------------------------------------+ +Although many efforts focus on one of these issues separately, these efforts pay less attention to the fact that most issues are entangled in practice. The learnware paradigm aims to tackle many of these challenges through a unified framework: + ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Challenges | Learnware Paradigm Solutions | ++=======================+================================================================================================================================================================================+ +| Lack of training data | Strong models can be built with a small amount of data by refining well-performing learnwares. | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Lack of training | Users across all levels of expertise can adequately utilize numerous high-quality and potentially helpful learnwares | +| skills | identified by the system for their specific tasks. | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Catastrophic | Learnwares which pass the usability checks are always stored in the learnware doc system, retaining old knowledge. | +| forgetting | | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Continual learning | The learnware doc system continually expands its knowledge base with constant submissions of | +| | well-performed learnwares. | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Data privacy/ | Developers worldwide freely share their high-performing models, without revealing their training data. | +| proprietary | | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Unplanned tasks | Open to all legal developers, the learnware doc system accommodate helpful learnwares for | +| | various tasks, especially for unplanned, specialized, data-sensitive scenarios. | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Carbon emission | By assembling the most suitable small learnwares, local deployment becomes feasible, offering a practical alternative to large cloud-based models and their carbon footprints. | ++-----------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ How to Solve Future Tasks with Learnware Paradigm? ---------------------------------------------------- @@ -64,13 +63,13 @@ How to Solve Future Tasks with Learnware Paradigm? .. image:: ../_static/img/learnware_paradigm.jpg :align: center -Instead of building a model from scratch, users can submit their requirements to the learnware market, which then identifies and deploys helpful learnware(s) based on the specifications. Users can apply the learnware directly, adapt it using their data, or exploit it in other ways to improve their models. This process is more efficient and less expensive than building a model from scratch. +When a user is going to solve a new machine learning task, she can submit her requirements to the learnware doc system, and then the system will identify and assemble some helpful learnware(s) from numerous learnwares to return to the user based on the learnware specifications. She can apply the learnware(s) directly, adapt them by her own data, or exploit it in other ways to improve her own model. No matter which learnware reuse mechanism is adopted, the whole process can be much less expensive and more efficient than building a model from scratch by herself. Procedure of Learnware Paradigm ================================== -- ``Submitting Stage``: Developers voluntarily submit various learnwares to the learnware market, and the system conducts quality checks and further organization of these learnwares. -- ``Deploying Stage``: When users submit task requirements, the learnware market automatically selects whether to recommend a single learnware or a combination of multiple learnwares and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse interfaces. +- ``Submitting Stage``: Developers voluntarily submit various learnwares to the learnware doc system, and the system conducts quality checks and further organization of these learnwares. +- ``Deploying Stage``: The user submits her task requirement to the learnware doc system, and the system will identify and return some helpful learnwares to the user based on specifications, which can be further reused on user data. .. image:: ../_static/img/learnware_market.svg :align: center @@ -86,15 +85,15 @@ The architecture is designed based on the guidelines including *decoupling*, *au - At the workflow level, the ``learnware`` package consists of ``Submitting Stage`` and ``Deploying Stage``. -+---------------------+-------------------------------------------------------------------------------------------------------------------+ -| Module | Workflow | -+=====================+===================================================================================================================+ -| ``Submitting Stage``| The learnware developers submit learnwares to the learnware market, which conducts usability checks and further | -| | organization of these learnwares. | -+---------------------+-------------------------------------------------------------------------------------------------------------------+ -| ``Deploying Stage`` | The `learnware` package identifies learnwares according to users’ task requirements and provides efficient | -| | reuse and deployment methods. | -+---------------------+-------------------------------------------------------------------------------------------------------------------+ ++----------------------+---------------------------------------------------------------------------------------------------------------------+ +| Module | Workflow | ++======================+=====================================================================================================================+ +| ``Submitting Stage`` | The learnware developers submit learnwares to the learnware doc system, which conducts usability checks and further | +| | organization of these learnwares. | ++----------------------+---------------------------------------------------------------------------------------------------------------------+ +| ``Deploying Stage`` | The `learnware` package identifies learnwares according to users’ task requirements and provides efficient | +| | reuse and deployment methods. | ++----------------------+---------------------------------------------------------------------------------------------------------------------+ - At the module level, the ``learnware`` package is a platform that consists of ``Learnware``, ``Market``, ``Specification``, ``Model``, ``Reuse``, and ``Interface`` modules. diff --git a/docs/start/quick.rst b/docs/start/quick.rst index dc4fb594..d5b00cb0 100644 --- a/docs/start/quick.rst +++ b/docs/start/quick.rst @@ -7,8 +7,7 @@ Quick Start Introduction ==================== -This ``Quick Start`` guide aims to illustrate the straightforward process of establishing a full ``Learnware`` workflow -and utilizing ``Learnware`` to handle user tasks. +This ``Quick Start`` guide aims to illustrate the straightforward process of establishing a full ``Learnware`` workflow and utilizing ``Learnware`` to handle user tasks. Installation @@ -47,9 +46,8 @@ Learnware Package Workflow Users can start a ``Learnware`` workflow according to the following steps: Initialize a Learnware Market -------------------------------- +------------------------------ -The ``EasyMarket`` class provides the core functions of a ``Learnware Market``. You can initialize a basic ``Learnware Market`` named "demo" using the code snippet below: .. code-block:: python @@ -63,12 +61,9 @@ You can initialize a basic ``Learnware Market`` named "demo" using the code snip Upload Leanware ------------------------------- -Before uploading your learnware to the ``Learnware Market``, -you'll need to create a semantic specification, ``semantic_spec``. This involves selecting or inputting values for predefined semantic tags -to describe the features of your task and model. +Before uploading your learnware to the ``Learnware Market``, you'll need to create a semantic specification, ``semantic_spec``. This involves selecting or inputting values for semantic tags to describe the features of your task and model. -For instance, the following codes illustrates the semantic specification for a Scikit-Learn type model. -This model is tailored for education scenarios and performs classification tasks on tabular data: +For instance, the following code illustrates the semantic specification for a Scikit-Learn type model. This model is tailored for education scenarios and performs classification tasks on tabular data: .. code-block:: python @@ -83,8 +78,7 @@ This model is tailored for education scenarios and performs classification tasks license="MIT", ) -After defining the semantic specification, -you can upload your learnware using a single line of code: +After preparing the semantic specification, you can insert your learnware into the ``Learnware Market`` using a single line of code: .. code-block:: python @@ -96,8 +90,7 @@ Here, ``zip_path`` is the directory of your learnware ``zip`` package. Semantic Specification Search ------------------------------- -To find learnwares that align with your task's purpose, you'll need to provide a semantic specification, ``user_semantic``, that outlines your task's characteristics. -The ``Learnware Market`` will then perform an initial search using ``user_semantic``, identifying potentially useful learnwares with models that solve tasks similar to your requirements. +To identify learnwares that align with your task's purpose, you'll need to provide a semantic specification, ``user_semantic``, that outlines your task's characteristics. The ``Learnware Market`` will then perform an initial search based on ``user_semantic``, which filters learnwares by considering the semantic information of your task. .. code-block:: python @@ -105,7 +98,7 @@ The ``Learnware Market`` will then perform an initial search using ``user_semant user_info = BaseUserInfo(id="user", semantic_spec=semantic_spec) # search_learnware: performs semantic specification search when user_info doesn't include a statistical specification - search_result = easy_market.search_learnware(user_info) + search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() # single_result: the List of Tuple[Score, Learnware] returned by semantic specification search @@ -115,9 +108,7 @@ The ``Learnware Market`` will then perform an initial search using ``user_semant Statistical Specification Search --------------------------------- -If you decide in favor of porviding your own statistical specification file, ``stat.json``, -the ``Learnware Market`` can further refine the selection of learnwares from the previous step. -This second-stage search leverages statistical information to identify one or more learnwares that are most likely to be beneficial for your task. +If you generate and provide a statistical specification file ``rkme.json``, the ``Learnware Market`` will conduct learnware identification based on statistical information, and return more targeted models. Using the API we provided, you can easily generate this statistical specification locally. For example, the code below executes learnware search when using Reduced Kernel Mean Embedding (RKME) as the statistical specification: @@ -132,7 +123,7 @@ For example, the code below executes learnware search when using Reduced Kernel user_info = BaseUserInfo( semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_spec} ) - search_result = easy_market.search_learnware(user_info) + search_result = demo_market.search_learnware(user_info) single_result = search_results.get_single_results() multiple_result = search_results.get_multiple_results() @@ -153,31 +144,30 @@ For example, the code below executes learnware search when using Reduced Kernel Reuse Learnwares ------------------------------- -With the list of learnwares, ``mixture_learnware_list``, returned from the previous step, you can readily apply them to make predictions on your own data, bypassing the need to train a model from scratch. -We offer provide two methods for reusing a given list of learnwares: ``JobSelectorReuser`` and ``AveragingReuser``. -Just substitute ``test_x`` in the code snippet below with your own testing data, and you're all set to reuse learnwares: +We offer two data-free methods ``JobSelectorReuser`` and ``AveragingReuser`` for reusing a given list of learnwares. Please substitute ``test_x`` in the code snippet below with your own testing data: .. code-block:: python from learnware.reuse import JobSelectorReuser, AveragingReuser - # using jobselector reuser to reuse the searched learnwares to make prediction + # Use job selector reuser to reuse the searched learnwares to make prediction reuse_job_selector = JobSelectorReuser(learnware_list=mixture_item.learnwares) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) - # using averaging ensemble reuser to reuse the searched learnwares to make prediction + # Use averaging ensemble reuser to reuse the searched learnwares to make prediction reuse_ensemble = AveragingReuser(learnware_list=mixture_item.learnwares) ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) -We also provide two method when the user has labeled data for reusing a given list of learnwares: ``EnsemblePruningReuser`` and ``FeatureAugmentReuser``. -Just substitute ``test_x`` in the code snippet below with your own testing data, and substitute ``train_X, train_y`` with your own training labeled data, and you're all set to reuse learnwares: +We also provide two data-dependent methods: ``EnsemblePruningReuser`` and ``FeatureAugmentReuser``, when the user has minor labeled data for refining a given list of learnwares. Here's an example for adopting multiple returned learnwares by labeled data to solve classification tasks: .. code-block:: python from learnware.reuse import EnsemblePruningReuser, FeatureAugmentReuser # Use ensemble pruning reuser to reuse the searched learnwares to make prediction + # (train_x, train_y) is the small amount of labeled data + # `mode` has two options "classification" and "regression" reuse_ensemble = EnsemblePruningReuser(learnware_list=mixture_item.learnwares, mode="classification") reuse_ensemble.fit(train_X, train_y) ensemble_pruning_predict_y = reuse_ensemble.predict(user_data=data_X) @@ -190,6 +180,5 @@ Just substitute ``test_x`` in the code snippet below with your own testing data, Auto Workflow Example ============================ -The ``Learnware`` also offers automated workflow examples. -This includes preparing learnwares, uploading and deleting learnwares from the market, and searching for learnwares using both semantic and statistical specifications. +The ``Learnware`` also offers automated workflow examples. This includes preparing learnwares, uploading and deleting learnwares from the market, and searching for learnwares using both semantic and statistical specifications. To experience the basic workflow of the Learnware Market, please refer to `Learnware Examples `_. diff --git a/docs/workflows/client.rst b/docs/workflows/client.rst index 0482a3c4..45a34f4e 100644 --- a/docs/workflows/client.rst +++ b/docs/workflows/client.rst @@ -6,7 +6,7 @@ Learnware Client Introduction ==================== -``Learnware Client`` is a ``Python API`` that provides a convenient interface for interacting with the ``BeimingWu`` system. You can easily use the client to upload, download, delete, update, and search learnwares. +``Learnware Client`` is a ``Python API`` that provides a convenient interface for interacting with the ``Beimingwu`` system. You can easily use the client to upload, download, delete, update, and search learnwares. Prepare access token @@ -36,10 +36,12 @@ Where email is the registered mailbox of the system and token is the token obtai Upload Leanware ------------------------------- -Before uploading a learnware, you'll need to prepare the semantic specification of your learnware. Let's take the classification task for tabular data as an example. You can create a semantic specification by a helper function ``create_semantic_specification``. +Before uploading a learnware, you'll need to prepare the semantic specification of your learnware. Let's take the classification task for tabular data as an example. You can create a semantic specification by a helper function ``generate_semantic_spec``. .. code-block:: python + from learnware.specification import generate_semantics_spec + # Prepare input description when data_type="Table" input_description = { "Dimension": 5, @@ -63,7 +65,7 @@ Before uploading a learnware, you'll need to prepare the semantic specification } # Create semantic specification - semantic_spec = client.create_semantic_specification( + semantic_spec = generate_semantic_spec( name="learnware_example", description="Just a example for uploading a learnware", data_type="Table", @@ -75,7 +77,7 @@ Before uploading a learnware, you'll need to prepare the semantic specification output_description=output_description, ) -Ensure that the input parameters for the semantic specification fall within the specified ranges provided by ``client.list_semantic_specification_values(key)``: +Please ensure that the input parameters for the semantic specification fall within the specified ranges provided by ``client.list_semantic_specification_values(key)``: * "data_type" must be within the range of ``key=SemanticSpecificationKey.DATA_TYPE``. * "task_type" must be within the range of ``key=SemanticSpecificationKey.TASK_TYPE``. @@ -87,7 +89,7 @@ Ensure that the input parameters for the semantic specification fall within the Finally, the semantic specification and the zip package path of the learnware were filled in to upload the learnware. -Remember to verify the learnware before uploading it, as shown in the following code example: +Remember to validate your learnware before uploading it, as shown in the following code example: .. code-block:: python @@ -104,7 +106,7 @@ Remember to verify the learnware before uploading it, as shown in the following learnware_zip_path=zip_path, semantic_specification=semantic_spec ) -After uploading the learnware successfully, you can see it in ``My Learnware``, the background will check it. Click on the learnware, which can be viewed in the ``Verify Status``. After the check passes, the Unverified tag of the learnware will disappear, and the uploaded learnware will appear in the system. +After uploading the learnware successfully, you can see it in ``Personal Information - My Learnware``, the background will check it. Click on the learnware, which can be viewed in the ``Verify Status``. After the check passes, the Unverified tag of the learnware will disappear, and the uploaded learnware will appear in the system. Update Learnware ------------------------------- @@ -153,37 +155,40 @@ The ``delete_learnware`` method is used to delete a learnware from the server. Semantic Specification Search ------------------------------- -You can search the learnware in the system through the semantic specification, and all the learnware conforming to the semantic specification will be returned through the API. For example, the following code will give you all the learnware in the system whose task type is classified: +You can search for learnware(s) in the system through semantic specifications, and all learnwares that meet the semantic specifications will be returned via the API. For example, the following code retrieves all learnware in the system with a task type of "Classification": .. code-block:: python from learnware.market import BaseUserInfo - user_semantic = client.create_semantic_specification( + user_semantic = generate_semantic_spec( task_type="Classification" ) user_info = BaseUserInfo(semantic_spec=user_semantic) - learnware_list = client.search_learnware(user_info, page_size=None) - + search_result = client.search_learnware(user_info) + Statistical Specification Search --------------------------------- -You can also search the learnware in the system through the statistical specification, and all the learnware with similar distribution will be returned through the API. Using the ``generate_stat_spec`` function mentioned above, you can easily get the ``stat_spec`` for your current task, and then get the learnware that meets the statistical specification for the same type of data in the system by using the following code: +Moreover, you can also search for learnware(s) in the learnware dock system through statistical specifications, and more targeted learnwares for your task will be returned through the API. Using the ``generate_stat_spec`` function mentioned above, you can generate your task's statistical specification ``stat_spec``. Then, you can use the following code to easily obtain suitable learnware(s) identified by the system for your specific task: .. code-block:: python user_info = BaseUserInfo(stat_info={stat_spec.type: stat_spec}) - learnware_list = client.search_learnware(user_info, page_size=None) + search_result = client.search_learnware(user_info) Combine Semantic and Statistical Search ---------------------------------------- -By combining statistical and semantic specifications, you can perform more detailed searches, such as the following code that searches tabular data for pieces of learnware that satisfy your semantic specifications: + +By combining both semantic and statistical specifications, you can perform more accurate searches. For instance, the code below demonstrates how to search for learnware(s) in tabular data that satisfy both the semantic and statistical specifications: .. code-block:: python - user_semantic = client.create_semantic_specification( + from learnware.specification import generate_stat_spec + + user_semantic = generate_semantic_spec( task_type="Classification", scenarios=["Business"], ) @@ -191,11 +196,12 @@ By combining statistical and semantic specifications, you can perform more detai user_info = BaseUserInfo( semantic_spec=user_semantic, stat_info={rkme_table.type: rkme_table} ) - learnware_list = client.search_learnware(user_info, page_size=None) + search_result = client.search_learnware(user_info) + Heterogeneous Table Search ---------------------------------------- -When you provide a statistical specification for tabular data, the task type is "Classification" or "Regression", and your semantic specification includes descriptions for each dimension, the system will automatically enable heterogeneous table search. It won't only search in the tabular learnwares with same dimensions. The following code will perform heterogeneous table search through the API: +For tabular tasks, if the task type is "Classification" or "Regression", and you have provided a statistical specification along with descriptions for each feature dimension in the semantic specification, the system will enable heterogeneous table search. This is designed to support searching models from different feature spaces preliminarily. The following code example shows how to perform a heterogeneous table search via the API: .. code-block:: python @@ -206,7 +212,7 @@ When you provide a statistical specification for tabular data, the task type is "1": "leaf length", }, } - user_semantic = client.create_semantic_specification( + user_semantic = generate_semantic_spec( task_type="Classification", scenarios=["Business"], input_description=input_description, @@ -215,19 +221,18 @@ When you provide a statistical specification for tabular data, the task type is user_info = BaseUserInfo( semantic_spec=user_semantic, stat_info={rkme_table.type: rkme_table} ) - learnware_list = client.search_learnware(user_info) + search_result = client.search_learnware(user_info) Download and Use Learnware ------------------------------- -When the search is complete, you can download the learnware and configure the environment through the following code: +After the learnware search is completed, you can locally load and use the learnwares through the learnware IDs in ``search_result``, as shown in the following example: .. code-block:: python - for temp_learnware in learnware_list: - learnware_id = temp_learnware["learnware_id"] - - # you can use the learnware to make prediction now - learnware = client.load_learnware( - learnware_id=learnware_id, runnable_option="conda" - ) \ No newline at end of file + learnware_id = search_result["single"]["learnware_ids"][0] + learnware = client.load_learnware( + learnware_id=learnware_id, runnable_option="conda" + ) + # test_x is the user's data for prediction + predict_y = learnware.predict(test_x) \ No newline at end of file diff --git a/docs/workflows/reuse.rst b/docs/workflows/reuse.rst index ef729d03..2c81bc50 100644 --- a/docs/workflows/reuse.rst +++ b/docs/workflows/reuse.rst @@ -2,9 +2,10 @@ Learnwares Reuse ========================================== -``Learnware Reuser`` is a ``Python API`` that offers a variety of convenient tools for learnware reuse. Users can reuse a single learnware, combination of multiple learnwares, -and heterogeneous learnwares using these tools efficiently, thereby saving the laborious time and effort of building models from scratch. There are mainly two types of -reuse tools, based on whether user has gathered a small amount of labeled data beforehand: (1) data-free reuser and (2) data-dependent reuser. +``Learnware Reuser`` is a core module providing various basic reuse methods for convenient learnware reuse. +Users can efficiently reuse a single learnware, combination of multiple learnwares, +and heterogeneous learnwares using these methods. +There are two main categories of reuse methods: (1) data-free reusers which reuse learnwares directly and (2) data-dependent reusers which reuse learnwares with a small amount of labeled data. .. note:: @@ -40,7 +41,7 @@ Data-Free Reuser # predict_y is the prediction result of the reused learnwares predict_y = reuse_job_selector.predict(user_data=test_x) -- ``AveragingReuser`` uses an ensemble method to make predictions. The ``mode`` parameter specifies the specific ensemble method: +- ``AveragingReuser`` uses an ensemble method to make predictions. The ``mode`` parameter specifies the type of ensemble method: .. code:: python @@ -61,9 +62,9 @@ Data-Free Reuser Data-Dependent Reuser ------------------------------------ -When users have a small amount of labeled data, they can also adapt/polish the received learnware(s) by reusing them with the labeled data, gaining even better performance. +When users have minor labeled data, they can also adapt the received learnware(s) by reusing them with the labeled data. -- ``EnsemblePruningReuser`` selectively ensembles a subset of learnwares to choose the ones that are most suitable for the user's task: +- ``EnsemblePruningReuser`` selects a subset of suitable learnwares using a multi-objective evolutionary algorithm and uses an average ensemble for prediction: .. code:: python @@ -79,7 +80,7 @@ When users have a small amount of labeled data, they can also adapt/polish the r reuse_ensemble_pruning.fit(val_X, val_y) predict_y = reuse_job_selector.predict(user_data=test_x) -- ``FeatureAugmentReuser`` helps users reuse learnwares by augmenting features. This reuser regards each received learnware as a feature augmentor, taking its output as a new feature and then build a simple model on the augmented feature set(``logistic regression`` for classification tasks and ``ridge regression`` for regression tasks): +- ``FeatureAugmentReuser`` assists in reusing learnwares by augmenting features. It concatenates the output of the original learnware with the user's task features, creating enhanced labeled data, on which a simple model is then trained (logistic regression for classification tasks and ridge regression for regression tasks): .. code:: python @@ -99,12 +100,14 @@ When users have a small amount of labeled data, they can also adapt/polish the r Hetero Reuse ==================== -When heterogeneous learnware search is activated(see `WORKFLOWS: Hetero Search <../workflows/search.html#hetero-search>`_), users would receive heterogeneous learnwares which are identified from the whole "specification world". -Though these recommended learnwares are trained from tasks with different feature/label spaces from the user's task, they can still be helpful and perform well beyond their original purpose. -Normally these learnwares are hard to be used, leave alone polished by users, due to the feature/label space heterogeneity. However with the help of ``HeteroMapAlignLearnware`` class which align heterogeneous learnware -with the user's task, users can easily reuse them with the same set of reuse methods mentioned above. +When heterogeneous learnware search is activated, +users receive potentially helpful heterogeneous learnwares which are identified from the whole "specification world"(see `WORKFLOWS: Hetero Search <../workflows/search.html#hetero-search>`_). +Normally, these learnwares cannot be directly applied to their tasks due to discrepancies in input and prediction spaces. +Nevertheless, the ``learnware`` package facilitates the reuse of heterogeneous learnwares through ``HeteroMapAlignLearnware``, +which aligns the input and output domain of learnwares to match those of the users' tasks. +These feature-aligned learnwares can then be utilized with either data-free reusers or data-dependent reusers. -During the alignment process of heterogeneous learnware, the statistical specifications of the learnware and the user's task ``(user_spec)`` are used for input space alignment, +During the alignment process of a heterogeneous learnware, the statistical specifications of the learnware and the user's task ``(user_spec)`` are used for input space alignment, and a small amount of labeled data ``(val_x, val_y)`` is mandatory to be used for output space alignment. This can be done by the following code: .. code:: python @@ -120,7 +123,7 @@ and a small amount of labeled data ``(val_x, val_y)`` is mandatory to be used fo predict_y = hetero_learnware.predict(user_data=test_x) To reuse multiple heterogeneous learnwares, -combine ``HeteroMapAlignLearnware`` with the homogeneous reuse methods ``AveragingReuser`` and ``EnsemblePruningReuser`` mentioned above will do the trick: +combine ``HeteroMapAlignLearnware`` with the homogeneous reuse methods ``AveragingReuser`` and ``EnsemblePruningReuser`` mentioned above: .. code:: python @@ -157,7 +160,7 @@ Run the following codes to try run a learnware with ``Model Container``: learnware = env_container.get_learnwares_with_container()[0] print(learnware.predict(test_x)) -The ``mode`` parameter has two options, each for a specific learnware environment loading method: +The ``mode`` parameter includes two options, each corresponding to a specific learnware environment loading method: - ``'conda'``: Install a separate conda virtual environment for each learnware (automatically deleted after execution); run each learnware independently within its virtual environment. - ``'docker'``: Install a conda virtual environment inside a Docker container (automatically destroyed after execution); run each learnware independently within the container (requires Docker privileges). diff --git a/docs/workflows/search.rst b/docs/workflows/search.rst index d4491c5c..506752b2 100644 --- a/docs/workflows/search.rst +++ b/docs/workflows/search.rst @@ -2,75 +2,81 @@ Learnwares Search ============================================================ -``Learnware Searcher`` is a key component of ``Learnware Market`` that identifies and recommends helpful learnwares to users according to their ``UserInfo``. Based on whether the returned learnware dimensions are consistent with user tasks, the searchers can be divided into two categories: homogeneous searchers and heterogeneous searchers. +``Learnware Searcher`` is a key module within the ``Learnware Market`` that identifies and recommends helpful learnwares to users according to their user information. The ``learnware`` package currently provide two types of learnware searchers: -All the searchers are implemented as a subclass of ``BaseSearcher``. When initializing, you should assign a ``organizer`` to it. The introduction of ``organizer`` is shown in `COMPONENTS: Market - Framework <../components/market.html>`_. Then these searchers can be called with ``UserInfo`` and return ``SearchResults``. +- homogeneous searchers conduct homogeneous learnware identification and return helpful learnware(s) within the same feature space as the user's task; +- heterogenous searchers preliminarily support heterogenous learnware identification for tabular tasks, which broaden the search scope and return targeted learnware(s) from different feature spaces. + +All the searchers are implemented as a subclass of ``BaseSearcher``. When initializing, an ``organizer`` should be assigned to it. +The introduction of ``organizer`` is shown in `COMPONENTS: Market - Framework <../components/market.html>`_. +Then, these searchers can be invoked with user information provided in ``BaseUserInfo``, and they will return ``SearchResults`` containing identification results. Homo Search ====================== -The homogeneous search of helpful learnwares can be divided into two stages: semantic specification search and statistical specification search. Both of them needs ``BaseUserInfo`` as input. The following codes shows how to use the searcher to search for helpful learnwares from a market ``easy_market`` for a user. The introduction of ``EasyMarket`` is in `COMPONENTS: Market <../components/market.html>`_. +The homogeneous search of helpful learnwares can be divided into two stages: semantic specification search and statistical specification search. Both of them needs ``BaseUserInfo`` as input. +The following codes shows how to use the searcher to search for helpful learnwares from a market ``easy_market`` for a user. +The introduction of ``EasyMarket`` is in `COMPONENTS: Market <../components/market.html>`_. .. code-block:: python + from learnware.market import BaseUserInfo, instantiate_learnware_market + from learnware.specification import generate_semantic_spec, generate_stat_spec + + easy_market = instantiate_learnware_market(market_id="demo", name="easy", rebuild=True) + # generate BaseUserInfo(semantic_spec + stat_info) - user_semantic = { - "Data": {"Values": ["Table"], "Type": "Class"}, - "Task": {"Values": ["Regression"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "", "Type": "String"}, - "Input": {"Dimension": 82, "Description": {},}, - "Output": {"Dimension": 1, "Description": {},}, - "License": {"Values": ["MIT"], "Type": "Class"}, - } - user_spec = generate_rkme_table_spec(X=x) + user_semantic = generate_semantic_spec( + task_type="Classification", + scenarios=["Business"], + ) + rkme_table = generate_stat_spec(type="table", X=train_x) user_info = BaseUserInfo( - semantic_spec=user_semantic, - stat_info={"RKMETableSpecification": user_spec} + semantic_spec=user_semantic, stat_info={rkme_table.type: rkme_table} ) - - # search the market for the user search_result = easy_market.search_learnware(user_info) - # search result: single_result - single_result = search_result.get_single_results() - print(f"single model num: {len(single_result)}, - max_score: {single_result[0].score}, - min_score: {single_result[-1].score}" - ) - - # search result: multiple_result - multiple_result = search_result.get_multiple_results() - mixture_id = " ".join([learnware.id for learnware in multiple_result[0].learnwares]) - print(f"mixture_score: {multiple_result[0].score}, mixture_learnwares: {mixture_id}") +In the above code, ``search_result`` is of type dict, with the following specific structure (``"single"`` and ``"multiple"`` correspond to the search results for a single learnware and multiple learnwares, respectively): + +.. code-block:: python + + search_result = { + "single": { + "learnware_ids": List[str], + "semantic_specifications": List[dict], + "matching": List[float], + }, + "multiple": { + "learnware_ids": List[str], + "semantic_specifications": List[dict], + "matching": float, + }, + } Hetero Search ====================== -For table-based user tasks, -homogeneous searchers like ``EasySearcher`` fail to recommend learnwares when no table learnware matches the user task's feature dimension, returning empty results. -To enhance functionality, the ``learnware`` package includes the heterogeneous learnware search feature, whose processions is as follows: +For tabular tasks, homogeneous searchers like ``EasySearcher`` may fail to recommend learnwares if no table learnware shares the same feature space as the user's task, resulting in empty returns. The ``learnware`` package preliminarily supports the search of learnwares from different feature spaces through heterogeneous searchers. The process is as follows: -- Learnware markets such as ``Hetero Market`` integrate different specification islands into a unified "specification world" by assigning system-level specifications to all learnwares. This allows heterogeneous searchers like ``HeteroSearcher`` to find helpful learnwares from all available table learnwares. -- Searchers assign system-level specifications to users based on ``UserInfo``'s statistical specification, using methods provided by corresponding organizers. In ``Hetero Market``, for example, ``HeteroOrganizer.generate_hetero_map_spec`` generates system-level specifications for users. -- Finally searchers conduct statistical specification search across the "specification world". User's system-level specification will guide the searcher in pinpointing helpful heterogeneous learnwares. +- Learnware markets such as ``Hetero Market`` integrate different tabular specification islands into a unified "specification world" by generating new system specifications for learnwares. This allows heterogeneous searchers like ``HeteroSearcher`` to recommend tabular learnwares from the entire learnware collection. +- Based on their statistical specifications, users receive new specifications assigned by searchers, which employ methods from the respective organizers. For instance, in ``Hetero Market``, ``HeteroOrganizer.generate_hetero_map_spec`` is used to generate new specifications for users. +- Finally searchers conduct statistical specification search across the unified "specification world" based on users' new specifications and return potentially targeted heterogeneous learnwares. -To activate heterogeneous learnware search, ``UserInfo`` should contain both semantic and statistical specifications. What's more, the semantic specification should meet the following requirements: +To activate heterogeneous learnware search, ``UserInfo`` needs to include both semantic and statistical specifications. Furthermore, the semantic specification should meet the following requirements: - The task type should be ``Classification`` or ``Regression``. - The data type should be ``Table``. -- It should include description for at least one feature dimension. -- The feature dimension stated here should match with the feature dimension in the statistical specification. +- There should be a description for at least one feature dimension. +- The feature dimension mentioned here must align with that in the statistical specification. + +The code below demonstrates how to search for potentially useful heterogeneous learnwares from a market ``hetero_market`` for a user. +For more information about ``HeteroMarket``, see `COMPONENTS: Hetero Market <../components/market.html#hetero-market>`_. -The following codes shows how to search for helpful heterogeneous learnwares from a market -``hetero_market`` for a user. The introduction of ``HeteroMarket`` is in `COMPONENTS: Hetero Market <../components/market.html#hetero-market>`_. .. code-block:: python # initiate a Hetero Market - hetero_market = initiate_learnware_market(market_id="test_hetero", name="hetero") + hetero_market = initiate_learnware_market(market_id="demo", name="hetero", rebuild=True) # user_semantic should meet the above requirements input_description = { diff --git a/docs/workflows/upload.rst b/docs/workflows/upload.rst index 382843a0..5a0d9d76 100644 --- a/docs/workflows/upload.rst +++ b/docs/workflows/upload.rst @@ -26,7 +26,7 @@ Model Invocation File ``__init__.py`` To ensure that the uploaded learnware can be used by subsequent users, you need to provide interfaces for model fitting ``fit(X, y)``, prediction ``predict(X)``, and fine-tuning ``finetune(X, y)`` in ``__init__.py``. Among these interfaces, only the ```predict(X)``` interface is mandatory, while the others depend on the functionality of your model. -Below is a reference template for the ```__init__.py``` file. Please make sure that the input parameter format (the number of parameters and parameter names) for each interface in your model invocation file matches the template below. +Below is a reference template for the ``__init__.py`` file. Please make sure that the input parameter format (the number of parameters and parameter names) for each interface in your model invocation file matches the template below. .. code-block:: python @@ -250,7 +250,7 @@ For more details, please refer to :ref:`semantic specification Date: Sun, 28 Jan 2024 17:05:03 +0800 Subject: [PATCH 021/108] [DOC] polish details --- docs/_static/img/learnware_market.svg | 1571 ++++++++++++++++- docs/components/learnware.rst | 2 +- docs/components/market.rst | 2 +- docs/components/spec.rst | 2 +- learnware/reuse/ensemble_pruning.py | 2 +- .../specification/regular/image/cnn_gp.py | 2 +- 6 files changed, 1575 insertions(+), 6 deletions(-) diff --git a/docs/_static/img/learnware_market.svg b/docs/_static/img/learnware_market.svg index 68d0a87a..ea1ad43b 100644 --- a/docs/_static/img/learnware_market.svg +++ b/docs/_static/img/learnware_market.svg @@ -1 +1,1570 @@ -Task CTask 1Task 2“ab”Spam?0011Model 1Model 2spamemailemailab>0.8ab0.8spamemailour>1.2our1.2Model CspecificationspecificationLearnwareCLearnware2Learnware1specificationLearnware MarketSubmitting StageDeploying StageDevelopersStandard learnware formatRequire-mentReturnSearchSubmitApply returned models directlyAdapt modelson user data(optional) \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/components/learnware.rst b/docs/components/learnware.rst index 71096042..af38b098 100644 --- a/docs/components/learnware.rst +++ b/docs/components/learnware.rst @@ -137,4 +137,4 @@ on ``user_data``. References ----------- -.. [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective evolutionary ensemble pruning guided by margin distribution. In *Proceedings of the 17th International Conference on Parallel Problem Solving from Nature*, 2022. \ No newline at end of file +.. [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective evolutionary ensemble pruning guided by margin distribution. In: *Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22)*, 2022, pp.427-441. \ No newline at end of file diff --git a/docs/components/market.rst b/docs/components/market.rst index c394edd5..896b5ec0 100644 --- a/docs/components/market.rst +++ b/docs/components/market.rst @@ -56,7 +56,7 @@ They share the same ``checker`` module and have different implementations of ``o Easy Market ------------- -Easy market is a basic realization of the learnware doc system. It consists of ``EasyOrganizer``, ``EasySearcher``, and the checker list ``[EasySemanticChecker, EasyStatChecker]``. +Easy market is a basic realization of the learnware market. It consists of ``EasyOrganizer``, ``EasySearcher``, and the checker list ``[EasySemanticChecker, EasyStatChecker]``. ``Easy Organizer`` diff --git a/docs/components/spec.rst b/docs/components/spec.rst index e3fe5f8d..10fec885 100644 --- a/docs/components/spec.rst +++ b/docs/components/spec.rst @@ -137,4 +137,4 @@ Please refer to `COMPONENTS: Hetero Market <../components/market.html#hetero-ma References ----------- -.. [1] Adrià Garriga-Alonso, Laurence Aitchison, and Carl Edward Rasmussen. Deep convolutional networks as shallow gaussian processes. In *International Conference on Learning Representations*, 2019. \ No newline at end of file +.. [1] Adrià Garriga-Alonso, Laurence Aitchison, and Carl Edward Rasmussen. Deep convolutional networks as shallow gaussian processes. In: *International Conference on Learning Representations (ICLR'19)*, 2019. \ No newline at end of file diff --git a/learnware/reuse/ensemble_pruning.py b/learnware/reuse/ensemble_pruning.py index 3ad0e950..7afc7f59 100644 --- a/learnware/reuse/ensemble_pruning.py +++ b/learnware/reuse/ensemble_pruning.py @@ -15,7 +15,7 @@ class EnsemblePruningReuser(BaseReuser): """ Baseline Multiple Learnware Reuser uing Marign Distribution guided multi-objective evolutionary Ensemble Pruning (MDEP) Method. - References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022. + References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective evolutionary ensemble pruning guided by margin distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), 2022, pp.427-441. """ def __init__(self, learnware_list: List[Learnware] = None, mode: str = "classification"): diff --git a/learnware/specification/regular/image/cnn_gp.py b/learnware/specification/regular/image/cnn_gp.py index 85d8cfd5..a429e1b2 100644 --- a/learnware/specification/regular/image/cnn_gp.py +++ b/learnware/specification/regular/image/cnn_gp.py @@ -11,7 +11,7 @@ Github Repository: https://github.com/cambridge-mlg/cnn-gp -References: [1] A. Garriga-Alonso, L. Aitchison, and C. E. Rasmussen. Deep Convolutional Networks as shallow Gaussian Processes. In: International Conference on Learning Representations (ICLR'19), 2019. +References: [1] Adrià Garriga-Alonso, Laurence Aitchison, and Carl Edward Rasmussen. Deep convolutional networks as shallow gaussian processes. In: International Conference on Learning Representations (ICLR'19), 2019. """ From f38d40cb90a5714d7b826b891ee48e13fe76bd61 Mon Sep 17 00:00:00 2001 From: liuht Date: Mon, 29 Jan 2024 12:56:24 +0800 Subject: [PATCH 022/108] [DOC] update system features doc --- docs/references/beimingwu.rst | 2 +- docs/start/intro.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/references/beimingwu.rst b/docs/references/beimingwu.rst index a25a6b17..a82b5c75 100644 --- a/docs/references/beimingwu.rst +++ b/docs/references/beimingwu.rst @@ -22,7 +22,7 @@ In addition, the Beimingwu system also has the following features: - ``Learnware Specification Generation``: The Beimingwu system provides specification generation interfaces in the ``learnware`` package, supporting various data types (tables, images, and text) for efficient local generation. - ``Learnware Quality Inspection``: The Beimingwu system includes multiple detection mechanisms to ensure the quality of each learnware in the system. - ``Diverse Learnware Search``: The Beimingwu system supports both semantic specifications and statistical specifications searches, covering data types such as tables, images, and text. In addition, for table-based tasks, the system preliminarily supports the search for heterogeneous table learnwares. -- ``Local Learnware Deployment``: The Beimingwu system provides a unified interface for learnware deployment and learnware reuse in the ``learnware`` package, facilitating users' convenient and secure deployment and reuse of arbitrary learnwares. +- ``Local Learnware Deployment``: The Beimingwu system provides a unified interface for learnware deployment and learnware reuse in the ``learnware`` package, facilitating users' convenient deployment and reuse of arbitrary learnwares. - ``Raw Data Protection``: The Beimingwu system operations, including learnware upload, search, and deployment, do not require users to upload raw data. All relevant statistical specifications are generated locally by users using open-source API. - ``Open Source System``: The Beimingwu system's source code is open-source, including the learnware package and frontend/backend code. The ``learnware`` package is highly extensible, making it easy to integrate new specification designs, learnware system designs, and learnware reuse methods in the future. diff --git a/docs/start/intro.rst b/docs/start/intro.rst index b29553b7..d15ed3fe 100644 --- a/docs/start/intro.rst +++ b/docs/start/intro.rst @@ -30,7 +30,7 @@ Why do we need Learnware? The Benefits of Learnware Paradigm ------------------------------------- -Machine learning has achieved great success in many fields but still faces various challenges, such as the need for extensive training data and advanced training techniques, the difficulty of continuous learning, the risk of catastrophic forgetting, and the leakage of data privacy. +Machine learning has achieved great success in many fields but still faces various challenges, such as the need for extensive training data and advanced training techniques, the difficulty of continuous learning, the risk of catastrophic forgetting, and the risk of data privacy breach. Although many efforts focus on one of these issues separately, these efforts pay less attention to the fact that most issues are entangled in practice. The learnware paradigm aims to tackle many of these challenges through a unified framework: From 7042ad713b95bdedbbb2079a593ce52407c26792 Mon Sep 17 00:00:00 2001 From: liuht Date: Mon, 29 Jan 2024 20:14:32 +0800 Subject: [PATCH 023/108] [DOC] add the arXiv reference --- README.md | 11 +++++++++-- README_zh.md | 11 +++++++++-- docs/about/about.rst | 4 ++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2ec33645..1489aa24 100644 --- a/README.md +++ b/README.md @@ -407,6 +407,13 @@ If you use our project in your research or work, we kindly request that you cite number = {1}, pages = {1--12}, } + +@article{tan2024beimingwu, + author = {Zhi-Hao Tan and Jian-Dong Liu and Xiao-Dong Bi and Peng Tan and Qin-Cheng Zheng and Hai-Tian Liu and Yi Xie and Xiao-Chuan Zou and Yang Yu and Zhi-Hua Zhou}, + title = {Beimingwu: A Learnware Dock System}, + journal = {arXiv:2401.14427}, + year = {2024} +} ``` Please acknowledge the use of our project by citing these papers in your work. Thank you for your support! @@ -415,8 +422,8 @@ Please acknowledge the use of our project by citing these papers in your work. T ## How to Contribute -Learnware is still young and may contain bugs and issues. We highly value and encourage contributions from the community. For detailed development guidelines, please consult our [Developer Guide](https://learnware.readthedocs.io/en/latest/about/dev.html). We kindly request that contributors adhere to the provided commit format and pre-commit configuration when participating in the project. Your valuable contributions are greatly appreciated. +`learnware` is still in its early stages and may contain bugs and issues. We highly value and encourage contributions from the community, and we sincerely invite everyone to research and improve learnware algorithms, develop and explore within the learnware paradigm. For detailed development guidelines, please consult our [Developer Guide](https://learnware.readthedocs.io/en/latest/about/dev.html). We kindly request that contributors adhere to the provided commit format and pre-commit configuration when participating in the project. Your valuable contributions are greatly appreciated. ## About Us -The Learnware repository is developed and maintained by the LAMDA Beimingwu R&D Team. To learn more about our team, please visit the [Team Overview](https://docs.bmwu.cloud/en/about-us.html). +The `learnware` repository is developed and maintained by the LAMDA Beimingwu R&D (Research and Development) Team. To learn more about our team, please visit the [Team Overview](https://docs.bmwu.cloud/en/about-us.html). diff --git a/README_zh.md b/README_zh.md index cb899c21..834a2a50 100644 --- a/README_zh.md +++ b/README_zh.md @@ -401,7 +401,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) # 引用 -如果你在研究或工作中使用了我们的项目,请引用下述论文,感谢你的支持! +如果您在研究或工作中使用了我们的项目,请引用下述论文,感谢你的支持! ```bibtex @article{zhou2022learnware, @@ -413,13 +413,20 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) number = {1}, pages = {1--12}, } + +@article{tan2024beimingwu, + author = {Zhi-Hao Tan and Jian-Dong Liu and Xiao-Dong Bi and Peng Tan and Qin-Cheng Zheng and Hai-Tian Liu and Yi Xie and Xiao-Chuan Zou and Yang Yu and Zhi-Hua Zhou}, + title = {Beimingwu: A Learnware Dock System}, + journal = {arXiv:2401.14427}, + year = {2024} +} ``` # 关于 ## 如何贡献 -`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献。我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的参与和支持! +`learnware` 还很年轻,可能存在错误和问题。我们非常欢迎大家为 `learnware` 做出贡献,研究和改进学件相关算法,或是围绕学件范式进行开发和探索。我们为所有的开发者提供了详细的[项目开发指南](https://learnware.readthedocs.io/en/latest/about/dev.html),并设置了相应的 commit 格式和 pre-commit 配置,请大家遵守。非常感谢大家的参与和支持! ## 关于我们 diff --git a/docs/about/about.rst b/docs/about/about.rst index 8d9bb8f1..bc67f3ff 100644 --- a/docs/about/about.rst +++ b/docs/about/about.rst @@ -2,7 +2,7 @@ About Us ================ -The R&D Team of ``learnware`` is from `Nanjing University's LAMDA Research Institute `_, with Prof. `Zhi-Hua Zhou `_ serving as the Founding Director. +The R&D (Research and Development) Team of ``learnware`` is from `Nanjing University's LAMDA Research Institute `_, with Prof. `Zhi-Hua Zhou `_ serving as the Founding Director. LAMDA is affiliated with the National Key Laboratory for Novel Software Technology, the Department of Computer Science & Technology and the School of Artificial Intelligence, Nanjing University, China. It locates at Computer Science and Technology Building in the Xianlin campus of Nanjing University, mainly in Rm910. @@ -13,6 +13,6 @@ The ``learnware`` package is currently maintained by the LAMDA Beimingwu R&D Tea Contact ========== -``learnware`` is still young. It may contain bugs and issues. Contributions are welcome. If you encounter any problems or have any suggestions while using the ``learnware`` package, please contact us: +``learnware`` is still in its early stages and may contain bugs and issues. Contributions are welcome. If you encounter any problems or have any suggestions while using the ``learnware`` package, please contact us: - Email: bmwu-support@lamda.nju.edu.cn \ No newline at end of file From 5dc310e8c88427bd78e0a13685eb78cd99381311 Mon Sep 17 00:00:00 2001 From: liuht Date: Mon, 29 Jan 2024 21:05:31 +0800 Subject: [PATCH 024/108] [DOC] modify auto workflow example in quick.rst --- docs/start/quick.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/start/quick.rst b/docs/start/quick.rst index d5b00cb0..1bab4f3d 100644 --- a/docs/start/quick.rst +++ b/docs/start/quick.rst @@ -180,5 +180,5 @@ We also provide two data-dependent methods: ``EnsemblePruningReuser`` and ``Feat Auto Workflow Example ============================ -The ``Learnware`` also offers automated workflow examples. This includes preparing learnwares, uploading and deleting learnwares from the market, and searching for learnwares using both semantic and statistical specifications. -To experience the basic workflow of the Learnware Market, please refer to `Learnware Examples `_. +The `learnware` package also offers automated workflow examples. This includes preparing learnwares, uploading and deleting learnwares from the market, and searching for learnwares using both semantic and statistical specifications. +To experience the basic workflow of the `learnware` package, the users can run `test/test_workflow/test_workflow.py` to try the basic workflow of `learnware`. From 002fe33ee54a5383873e1ed385ab82f7157dd717 Mon Sep 17 00:00:00 2001 From: liuht Date: Mon, 29 Jan 2024 22:27:41 +0800 Subject: [PATCH 025/108] [DOC] update references --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1489aa24..515e98d0 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ If you use our project in your research or work, we kindly request that you cite } @article{tan2024beimingwu, - author = {Zhi-Hao Tan and Jian-Dong Liu and Xiao-Dong Bi and Peng Tan and Qin-Cheng Zheng and Hai-Tian Liu and Yi Xie and Xiao-Chuan Zou and Yang Yu and Zhi-Hua Zhou}, + author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, title = {Beimingwu: A Learnware Dock System}, journal = {arXiv:2401.14427}, year = {2024} diff --git a/README_zh.md b/README_zh.md index 834a2a50..5a066560 100644 --- a/README_zh.md +++ b/README_zh.md @@ -415,7 +415,7 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) } @article{tan2024beimingwu, - author = {Zhi-Hao Tan and Jian-Dong Liu and Xiao-Dong Bi and Peng Tan and Qin-Cheng Zheng and Hai-Tian Liu and Yi Xie and Xiao-Chuan Zou and Yang Yu and Zhi-Hua Zhou}, + author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, title = {Beimingwu: A Learnware Dock System}, journal = {arXiv:2401.14427}, year = {2024} From bbc4b2643ff95dce09462fcebce24656f894f08a Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 30 Jan 2024 23:08:51 +0800 Subject: [PATCH 026/108] Update README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 031fcf06..eed86a47 100644 --- a/README.md +++ b/README.md @@ -406,6 +406,15 @@ If you use our project in your research or work, we kindly request that you cite number = {1}, pages = {1--12}, } + +@misc{tan2024beimingwu, + title={Beimingwu: A Learnware Dock System}, + author={Zhi-Hao, Tan and Jian-Dong, Liu and Xiao-Dong, Bi and Peng, Tan and Qin-Cheng, Zheng and Hai-Tian, Liu and Yi, Xie and Xiao-Chuan, Zou and Yang, Yu and Zhi-Hua, Zhou}, + year={2024}, + eprint={2401.14427}, + archivePrefix={arXiv}, + primaryClass={cs.SE} +} ``` Please acknowledge the use of our project by citing these papers in your work. Thank you for your support! From d832c3f453ea04f9ca0c1e444e8bb389e1e317ab Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 30 Jan 2024 23:11:51 +0800 Subject: [PATCH 027/108] Update Citation --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index eed86a47..297f89a6 100644 --- a/README.md +++ b/README.md @@ -404,16 +404,16 @@ If you use our project in your research or work, we kindly request that you cite year = {2024}, volume = {67}, number = {1}, - pages = {1--12}, + pages = {1--12} } @misc{tan2024beimingwu, - title={Beimingwu: A Learnware Dock System}, - author={Zhi-Hao, Tan and Jian-Dong, Liu and Xiao-Dong, Bi and Peng, Tan and Qin-Cheng, Zheng and Hai-Tian, Liu and Yi, Xie and Xiao-Chuan, Zou and Yang, Yu and Zhi-Hua, Zhou}, - year={2024}, - eprint={2401.14427}, - archivePrefix={arXiv}, - primaryClass={cs.SE} + title={Beimingwu: A Learnware Dock System}, + author={Zhi-Hao, Tan and Jian-Dong, Liu and Xiao-Dong, Bi and Peng, Tan and Qin-Cheng, Zheng and Hai-Tian, Liu and Yi, Xie and Xiao-Chuan, Zou and Yang, Yu and Zhi-Hua, Zhou}, + year={2024}, + eprint={2401.14427}, + archivePrefix={arXiv}, + primaryClass={cs.SE} } ``` From d422bcaed6d95392278ada3724dee677fcba2027 Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 30 Jan 2024 23:52:04 +0800 Subject: [PATCH 028/108] [DOC, FIX] Update Citation --- README.md | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 297f89a6..fad55638 100644 --- a/README.md +++ b/README.md @@ -397,28 +397,24 @@ The results are depicted in the following table and figure. Similarly, even when If you use our project in your research or work, we kindly request that you cite the following papers: ```bibtex -@article{zhou2022learnware, +@article{zhou2024learnware, + title = {Learnware: small models do big}, author = {Zhou, Zhi-Hua and Tan, Zhi-Hao}, - title = {Learnware: Small Models Do Big}, - journal = {SCIENCE CHINA Information Sciences}, - year = {2024}, + journal = {Science China Information Sciences}, volume = {67}, number = {1}, - pages = {1--12} + pages = {112102}, + year = {2024} } -@misc{tan2024beimingwu, - title={Beimingwu: A Learnware Dock System}, - author={Zhi-Hao, Tan and Jian-Dong, Liu and Xiao-Dong, Bi and Peng, Tan and Qin-Cheng, Zheng and Hai-Tian, Liu and Yi, Xie and Xiao-Chuan, Zou and Yang, Yu and Zhi-Hua, Zhou}, - year={2024}, - eprint={2401.14427}, - archivePrefix={arXiv}, - primaryClass={cs.SE} +@article{tan2024beimingwu, + title = {Beimingwu: A Learnware Dock System}, + author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, + journal = {arXiv preprint arXiv:2401.14427}, + year = {2024} } ``` -Please acknowledge the use of our project by citing these papers in your work. Thank you for your support! - # About ## How to Contribute From 61469eeceaa8a9fd6ed8f443851d49a26f484f71 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 31 Jan 2024 00:01:06 +0800 Subject: [PATCH 029/108] [DOC, FIX] Update Citation --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fad55638..4239dfed 100644 --- a/README.md +++ b/README.md @@ -398,7 +398,7 @@ If you use our project in your research or work, we kindly request that you cite ```bibtex @article{zhou2024learnware, - title = {Learnware: small models do big}, + title = {Learnware: Small models do big}, author = {Zhou, Zhi-Hua and Tan, Zhi-Hao}, journal = {Science China Information Sciences}, volume = {67}, @@ -408,7 +408,7 @@ If you use our project in your research or work, we kindly request that you cite } @article{tan2024beimingwu, - title = {Beimingwu: A Learnware Dock System}, + title = {Beimingwu: A learnware dock system}, author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, journal = {arXiv preprint arXiv:2401.14427}, year = {2024} From 39c9fe97e205b04ca5820b42614e5b8e64875241 Mon Sep 17 00:00:00 2001 From: bxdd Date: Wed, 31 Jan 2024 00:09:37 +0800 Subject: [PATCH 030/108] [FIX, DOC] Update ZH Citation --- README_zh.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README_zh.md b/README_zh.md index 48576c69..3f9c66d7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -401,14 +401,21 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) 如果你在研究或工作中使用了我们的项目,请引用下述论文,感谢你的支持! ```bibtex -@article{zhou2022learnware, +@article{zhou2024learnware, + title = {Learnware: Small models do big}, author = {Zhou, Zhi-Hua and Tan, Zhi-Hao}, - title = {Learnware: Small Models Do Big}, - journal = {SCIENCE CHINA Information Sciences}, - year = {2024}, + journal = {Science China Information Sciences}, volume = {67}, number = {1}, - pages = {1--12}, + pages = {112102}, + year = {2024} +} + +@article{tan2024beimingwu, + title = {Beimingwu: A learnware dock system}, + author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, + journal = {arXiv preprint arXiv:2401.14427}, + year = {2024} } ``` @@ -420,4 +427,4 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) ## 关于我们 -`learnware` 由 LAMDA 北冥坞研发团队开发和维护,更多信息可参考:[团队简介](https://docs.bmwu.cloud/zh-CN/about-us.html)。 \ No newline at end of file +`learnware` 由 LAMDA 北冥坞研发团队开发和维护,更多信息可参考:[团队简介](https://docs.bmwu.cloud/zh-CN/about-us.html)。 From 2c4a00a5eb892a86c53e090c66c9e8b7925f7ffd Mon Sep 17 00:00:00 2001 From: Gene Date: Thu, 1 Feb 2024 14:47:40 +0800 Subject: [PATCH 031/108] [DOC] update citation in readme --- README.md | 7 ------- README_zh.md | 7 ------- 2 files changed, 14 deletions(-) diff --git a/README.md b/README.md index eed126d8..0116a3c3 100644 --- a/README.md +++ b/README.md @@ -414,13 +414,6 @@ If you use our project in your research or work, we kindly request that you cite journal = {arXiv preprint arXiv:2401.14427}, year = {2024} } - -@article{tan2024beimingwu, - author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, - title = {Beimingwu: A Learnware Dock System}, - journal = {arXiv:2401.14427}, - year = {2024} -} ``` # About diff --git a/README_zh.md b/README_zh.md index 6db72b32..4a363725 100644 --- a/README_zh.md +++ b/README_zh.md @@ -420,13 +420,6 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x) journal = {arXiv preprint arXiv:2401.14427}, year = {2024} } - -@article{tan2024beimingwu, - author = {Tan, Zhi-Hao and Liu, Jian-Dong and Bi, Xiao-Dong and Tan, Peng and Zheng, Qin-Cheng and Liu, Hai-Tian and Xie, Yi and Zou, Xiao-Chuan and Yu, Yang and Zhou, Zhi-Hua}, - title = {Beimingwu: A Learnware Dock System}, - journal = {arXiv:2401.14427}, - year = {2024} -} ``` # 关于 From f9e7365fc7ba178cebd4cab1be73a3ebcdfd07d2 Mon Sep 17 00:00:00 2001 From: GeneLiuXe <356340460@qq.com> Date: Thu, 29 Feb 2024 16:30:14 +0800 Subject: [PATCH 032/108] [DOC] add the github link --- docs/about/dev.rst | 2 +- docs/start/exp.rst | 2 +- docs/start/install.rst | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/about/dev.rst b/docs/about/dev.rst index 24146053..3ffbe776 100644 --- a/docs/about/dev.rst +++ b/docs/about/dev.rst @@ -10,7 +10,7 @@ As a developer, you often want make changes to ``learnware`` and hope it would r .. code-block:: bash - $ git clone https://gitlink.org.cn/beimingwu/learnware.git && cd Learnware + $ git clone https://github.com/Learnware-LAMDA/Learnware.git && cd Learnware $ pip install -e .[dev] .. note:: diff --git a/docs/start/exp.rst b/docs/start/exp.rst index b18888f3..2d7c05cc 100644 --- a/docs/start/exp.rst +++ b/docs/start/exp.rst @@ -168,7 +168,7 @@ The results are depicted in the following table and figure. Similarly, even when Get Start Examples ========================= -Examples for `Tabular, Text` and `Image` data sets are available at `Learnware Examples `_. You can run { workflow.py } directly to reproduce related experiments. +Examples for `Tabular, Text` and `Image` data sets are available at `Learnware Examples `_. You can run { workflow.py } directly to reproduce related experiments. We utilize the `fire` module to construct our experiments. Table Examples diff --git a/docs/start/install.rst b/docs/start/install.rst index 02ca018a..683b5d5a 100644 --- a/docs/start/install.rst +++ b/docs/start/install.rst @@ -36,7 +36,7 @@ Also, Users can install ``learnware`` by the source code according to the follow .. code-block:: bash - $ git clone https://gitlink.org.cn/beimingwu/learnware.git && cd Learnware + $ git clone https://github.com/Learnware-LAMDA/Learnware.git && cd Learnware $ pip install -e .[dev] .. note:: diff --git a/setup.py b/setup.py index 9f4e9985..7a485838 100644 --- a/setup.py +++ b/setup.py @@ -91,7 +91,7 @@ def get_version(rel_path: str) -> str: name=NAME, version=VERSION, license="Apache-2.0 Licence", - url="https://www.gitlink.org.cn/beimingwu/learnware", + url="https://github.com/Learnware-LAMDA/learnware", packages=find_packages(), include_package_data=True, description=DESCRIPTION, From 2ec441fa8d327b84efb7e52b8686d4ffae796f06 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 25 Nov 2024 18:04:24 +0800 Subject: [PATCH 033/108] [ENH] build the framework of llm market --- learnware/market/llm/__init__.py | 13 +++++++++++++ learnware/market/module.py | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 learnware/market/llm/__init__.py diff --git a/learnware/market/llm/__init__.py b/learnware/market/llm/__init__.py new file mode 100644 index 00000000..ff094e81 --- /dev/null +++ b/learnware/market/llm/__init__.py @@ -0,0 +1,13 @@ +from ...logger import get_module_logger +from ...utils import is_torch_available + +logger = get_module_logger("llm") + +if not is_torch_available(verbose=False): + LLMSearcher = None + logger.error("LLMSearcher are not available because 'torch' is not installed!") +else: + # TODO + pass + +__all__ = ["LLMSearcher"] \ No newline at end of file diff --git a/learnware/market/module.py b/learnware/market/module.py index cdc13e78..5586d996 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -2,7 +2,7 @@ from .classes import CondaChecker from .easy import EasyOrganizer, EasySearcher, EasySemanticChecker, EasyStatChecker from .heterogeneous import HeteroMapTableOrganizer, HeteroSearcher - +from .llm import LLMSearcher def get_market_component( name, market_id, rebuild, organizer_kwargs=None, searcher_kwargs=None, checker_kwargs=None, conda_checker=False @@ -36,6 +36,19 @@ def get_market_component( "searcher": hetero_searcher, "checker_list": hetero_checker_list, } + elif name == "llm": + llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) + llm_searcher = LLMSearcher(organizer=llm_organizer) + llm_checker_list = [ + EasySemanticChecker(), + EasyStatChecker() if conda_checker is False else CondaChecker(EasyStatChecker()), + ] + + market_component = { + "organizer": llm_organizer, + "searcher": llm_searcher, + "checker_list": llm_checker_list, + } else: raise ValueError(f"name {name} is not supported for market") From 58f3c87012223cf9fb147525d184da224a72257b Mon Sep 17 00:00:00 2001 From: liuht Date: Mon, 25 Nov 2024 18:38:44 +0800 Subject: [PATCH 034/108] [ENH] add llm specifications --- learnware/config.py | 1 + learnware/market/utils.py | 2 + .../specification/regular/text/task_vector.py | 49 +++++++++++++++++++ learnware/specification/system/llm_vector.py | 26 ++++++++++ learnware/tests/benchmarks/__init__.py | 16 ++++++ 5 files changed, 94 insertions(+) create mode 100644 learnware/specification/regular/text/task_vector.py create mode 100644 learnware/specification/system/llm_vector.py diff --git a/learnware/config.py b/learnware/config.py index a94e3b6f..24443146 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -92,6 +92,7 @@ def get_platform(): "Feature Extraction", "Segmentation", "Object Detection", + "Text Generation", "Others", ], "Type": "Class", # Choose only one class diff --git a/learnware/market/utils.py b/learnware/market/utils.py index 79411ba3..6ad9358f 100644 --- a/learnware/market/utils.py +++ b/learnware/market/utils.py @@ -5,6 +5,8 @@ def parse_specification_type( "RKMETableSpecification", "RKMETextSpecification", "RKMEImageSpecification", + "LLMGeneralCapabilitySpecification", + "TaskVectorSpecification" ], ): for spec in spec_list: diff --git a/learnware/specification/regular/text/task_vector.py b/learnware/specification/regular/text/task_vector.py new file mode 100644 index 00000000..c21afa11 --- /dev/null +++ b/learnware/specification/regular/text/task_vector.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import os +from typing import Any, Dict, List, Union + +from ..base import RegularStatSpecification +from ....config import C +from ....logger import get_module_logger +from ....utils import allocate_cuda_idx, choose_device + +logger = get_module_logger("RKMETextSpecification", "INFO") + + +class TaskVectorSpecification(RegularStatSpecification): + """Task Vector Specification for Large Language Model""" + + def __init__(self, cuda_idx: int = None, **kwargs): + """Initializing Task Vector Specification's parameters. + + Parameters + ---------- + cuda_idx : int + A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. None indicates automatically choose device + """ + self.task_vector = None + self._cuda_idx = allocate_cuda_idx() if cuda_idx is None else cuda_idx + self._device = choose_device(cuda_idx=self._cuda_idx) + + self.model_config = None + + super(TaskVectorSpecification, self).__init__(type=self.__class__.__name__) + + def _generate_models(self): + """Initialize foundational model (e.g. RoBERTa) used for task vector generation. + """ + pass + + def generate_stat_spec_from_data( + self, + X: List[str], + verbose: bool = True, + **kwargs + ): + pass + + def dist(self, VectorSpec2: TaskVectorSpecification) -> float: + """Compute cosine similarity between two LLM task vectors. + """ + pass \ No newline at end of file diff --git a/learnware/specification/system/llm_vector.py b/learnware/specification/system/llm_vector.py new file mode 100644 index 00000000..ef40d014 --- /dev/null +++ b/learnware/specification/system/llm_vector.py @@ -0,0 +1,26 @@ +from __future__ import annotations +import numpy as np + +from .base import SystemStatSpecification +from ...tests.benchmarks import LLMBenchmark +from ...logger import get_module_logger + +logger = get_module_logger("llm_base_vector_spec") + + +class LLMGeneralCapabilitySpecification(SystemStatSpecification): + """Large Language Model Base Vector Specification""" + + def __init__(self): + self.score_vector = None + super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) + + def generate_stat_spec_from_system(self, model: TorchModel) -> np.ndarray: + # model: foundation model + # List[str]: each str is a dataset name + dataset_names = LLMBenchmark().get_general_capability_datasets() + + pass + + def get_spec(self) -> np.ndarray: + return self.score_vector \ No newline at end of file diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index 436d5aee..a5f48fa2 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -182,3 +182,19 @@ def get_benchmark(self, benchmark_config: Union[str, BenchmarkConfig]) -> Benchm train_y_paths=train_y_paths, extra_info_path=extra_info_path, ) + + +class LLMBenchmark: + def __init__(self): + self.benchmark_configs = benchmark_configs + + def list_general_capability_benchmarks(self) -> Dict: + pass + + def list_specific_capability_benchmarks(self) -> Dict: + pass + + def get_benchmark(self, benchmark_name: str) -> Dict[str, List[str]]: + # preprocess hugging face datasets into list of strings + # need to specify hugging face save path with root dir C.cache_dir + pass \ No newline at end of file From c3ca7c98b7c4335d090ce1da26038b1f61b88330 Mon Sep 17 00:00:00 2001 From: liuht Date: Tue, 26 Nov 2024 16:18:47 +0800 Subject: [PATCH 035/108] [FIX] add import --- learnware/specification/__init__.py | 8 +++++++- learnware/specification/module.py | 17 ++++++++++++++++- learnware/specification/regular/__init__.py | 3 ++- .../specification/regular/text/__init__.py | 6 ++++-- learnware/specification/system/__init__.py | 6 ++++-- 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/learnware/specification/__init__.py b/learnware/specification/__init__.py index 82246ff3..f8a95b3d 100644 --- a/learnware/specification/__init__.py +++ b/learnware/specification/__init__.py @@ -5,9 +5,10 @@ RKMEStatSpecification, RKMETableSpecification, RKMETextSpecification, + TaskVectorSpecification, rkme_solve_qp, ) -from .system import HeteroMapTableSpecification +from .system import HeteroMapTableSpecification, LLMGeneralCapabilitySpecification from ..utils import is_torch_available if not is_torch_available(verbose=False): @@ -15,12 +16,14 @@ generate_rkme_table_spec = None generate_rkme_image_spec = None generate_rkme_text_spec = None + generate_task_vector_spec = None generate_semantic_spec = None else: from .module import ( generate_rkme_image_spec, generate_rkme_table_spec, generate_rkme_text_spec, + generate_task_vector_spec, generate_semantic_spec, generate_stat_spec, ) @@ -33,11 +36,14 @@ "RKMEStatSpecification", "RKMETableSpecification", "RKMETextSpecification", + "TaskVectorSpecification", "HeteroMapTableSpecification", + "LLMGeneralCapabilitySpecification", "rkme_solve_qp", "generate_rkme_image_spec", "generate_rkme_table_spec", "generate_rkme_text_spec", + "generate_task_vector_spec", "generate_semantic_spec", "generate_stat_spec", ] diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 9ad3d8a3..127044a1 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -4,7 +4,7 @@ import pandas as pd import torch -from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification +from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification, TaskVectorSpecification from .utils import convert_to_numpy from ..config import C @@ -175,6 +175,21 @@ def generate_rkme_text_spec( return rkme_text_spec +def generate_task_vector_spec( + X: List[str], + verbose: bool = True, + **kwargs +) -> TaskVectorSpecification: + # Check input type + if not isinstance(X, list) or not all(isinstance(item, str) for item in X): + raise TypeError("Input data must be a list of strings.") + + # Generate task vector spec + task_vector_spec = TaskVectorSpecification() + task_vector_spec.generate_stat_spec_from_data(X, verbose, **kwargs) + return task_vector_spec + + def generate_stat_spec( type: str, X: Union[np.ndarray, pd.DataFrame, torch.Tensor, List[str]], *args, **kwargs ) -> Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification]: diff --git a/learnware/specification/regular/__init__.py b/learnware/specification/regular/__init__.py index 51c79219..c6c69f9b 100644 --- a/learnware/specification/regular/__init__.py +++ b/learnware/specification/regular/__init__.py @@ -1,7 +1,7 @@ from .base import RegularStatSpecification from .image import RKMEImageSpecification from .table import RKMEStatSpecification, RKMETableSpecification, rkme_solve_qp -from .text import RKMETextSpecification +from .text import RKMETextSpecification, TaskVectorSpecification __all__ = [ "RegularStatSpecification", @@ -10,4 +10,5 @@ "RKMETableSpecification", "rkme_solve_qp", "RKMETextSpecification", + "TaskVectorSpecification" ] diff --git a/learnware/specification/regular/text/__init__.py b/learnware/specification/regular/text/__init__.py index 18f2c2dd..157fac58 100644 --- a/learnware/specification/regular/text/__init__.py +++ b/learnware/specification/regular/text/__init__.py @@ -5,8 +5,10 @@ if not is_torch_available(verbose=False): RKMETextSpecification = None - logger.error("RKMETextSpecification is not available because 'torch' is not installed!") + TaskVectorSpecification = None + logger.error("RKMETextSpecification and TaskVectorSpecification are not available because 'torch' is not installed!") else: from .rkme import RKMETextSpecification + from .task_vector import TaskVectorSpecification -__all__ = ["RKMETextSpecification"] +__all__ = ["RKMETextSpecification", "TaskVectorSpecification"] diff --git a/learnware/specification/system/__init__.py b/learnware/specification/system/__init__.py index d89292a2..5677074e 100644 --- a/learnware/specification/system/__init__.py +++ b/learnware/specification/system/__init__.py @@ -6,8 +6,10 @@ if not is_torch_available(verbose=False): HeteroMapTableSpecification = None - logger.error("HeteroMapTableSpecification is not available because 'torch' is not installed!") + LLMGeneralCapabilitySpecification = None + logger.error("HeteroMapTableSpecification and LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!") else: from .hetero_table import HeteroMapTableSpecification + from .llm_vector import LLMGeneralCapabilitySpecification -__all__ = ["SystemStatSpecification", "HeteroMapTableSpecification"] +__all__ = ["SystemStatSpecification", "HeteroMapTableSpecification", "LLMGeneralCapabilitySpecification"] From a7c8f97f21188991db8fa63ef0cdf8e9094ffcbf Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Thu, 28 Nov 2024 17:42:55 +0800 Subject: [PATCH 036/108] [MNT] add checker for TaskVectorSpecification --- learnware/market/easy/checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 3b22c9bb..fea721f5 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -133,7 +133,7 @@ def __call__(self, learnware): return self.INVALID_LEARNWARE, message inputs = np.random.randn(10, *input_shape) - elif spec_type == "RKMETextSpecification": + elif spec_type == "RKMETextSpecification" or spec_type == "TaskVectorSpecification": inputs = EasyStatChecker._generate_random_text_list(10) elif spec_type == "RKMEImageSpecification": From 43597ff2629ba1bd960ac0a7372d410eb82ce1d9 Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 29 Nov 2024 18:02:45 +0800 Subject: [PATCH 037/108] [ENH] add llm searcher --- learnware/market/llm/__init__.py | 5 +- learnware/market/llm/searcher.py | 64 ++++++++++++++++++++ learnware/market/llm/utils.py | 39 ++++++++++++ learnware/market/module.py | 2 +- learnware/market/utils.py | 2 +- learnware/model/__init__.py | 8 ++- learnware/model/torch_model.py | 32 ++++++++++ learnware/specification/system/llm_vector.py | 5 +- 8 files changed, 149 insertions(+), 8 deletions(-) create mode 100644 learnware/market/llm/searcher.py create mode 100644 learnware/market/llm/utils.py create mode 100644 learnware/model/torch_model.py diff --git a/learnware/market/llm/__init__.py b/learnware/market/llm/__init__.py index ff094e81..4f9f3399 100644 --- a/learnware/market/llm/__init__.py +++ b/learnware/market/llm/__init__.py @@ -1,13 +1,12 @@ from ...logger import get_module_logger from ...utils import is_torch_available -logger = get_module_logger("llm") +logger = get_module_logger("market_llm") if not is_torch_available(verbose=False): LLMSearcher = None logger.error("LLMSearcher are not available because 'torch' is not installed!") else: - # TODO - pass + from .searcher import LLMSearcher __all__ = ["LLMSearcher"] \ No newline at end of file diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py new file mode 100644 index 00000000..2a35c4c5 --- /dev/null +++ b/learnware/market/llm/searcher.py @@ -0,0 +1,64 @@ +from typing import Optional + +from .utils import is_llm +from ..base import BaseUserInfo, SearchResults +from ..easy import EasySearcher +from ..utils import parse_specification_type +from ...logger import get_module_logger + + +logger = get_module_logger("llm_searcher") + + +class LLMSearcher(EasySearcher): + def search_learnware(self, user_info: BaseUserInfo) -> SearchResults: + """Search helpful learnwares from learnware_list based on task vector specification + + Parameters + ---------- + user_info : BaseUserInfo + - user_info with semantic specifications and task vector specification + + Returns + ------- + Tuple[List[float], List[Learnware]] + the first is the sorted list of task vector similarities + the second is the sorted list of Learnware (single) by the task vector similarities + """ + pass + + def __call__( + self, + user_info: BaseUserInfo, + check_status: Optional[int] = None + ) -> SearchResults: + """Search learnwares based on user_info from learnwares with check_status. + Employs LLM learnware search if specific requirements are met, otherwise resorts to homogeneous search methods. + + Parameters + ---------- + user_info : BaseUserInfo + user_info contains semantic_spec and stat_info + check_status : int, optional + - None: search from all learnwares + - Others: search from learnwares with check_status + + Returns + ------- + Tuple[List[float], List[Learnware]] + the first is the sorted list of rkme dist + the second is the sorted list of Learnware (single) by the rkme dist + """ + learnware_list = self.learnware_organizer.get_learnwares(check_status=check_status) + semantic_search_result = self.semantic_searcher(learnware_list, user_info) + + learnware_list = [search_item.learnware for search_item in semantic_search_result.get_single_results()] + if len(learnware_list) == 0: + return SearchResults() + + if parse_specification_type(stat_specs=user_info.stat_info) is not None: + if is_llm(stat_specs=user_info.stat_info, semantic_spec=user_info.semantic_spec): + return self.search_learnware(user_info) + return self.stat_searcher(learnware_list, user_info, max_search_num=1, search_method="greedy") + else: + return semantic_search_result \ No newline at end of file diff --git a/learnware/market/llm/utils.py b/learnware/market/llm/utils.py new file mode 100644 index 00000000..a14921f4 --- /dev/null +++ b/learnware/market/llm/utils.py @@ -0,0 +1,39 @@ +from ...logger import get_module_logger + +logger = get_module_logger("llm_utils") + + +def is_llm(stat_specs: dict, semantic_spec: dict, verbose=True) -> bool: + """Check if user_info satifies all the criteria required for enabling llm learnware search + + Parameters + ---------- + user_info : BaseUserInfo + user_info contains semantic_spec and stat_info + + Returns + ------- + bool + A flag indicating whether llm search is enabled for user_info + """ + try: + if "TaskVectorSpecification" not in stat_specs: + if verbose: + logger.warning("TaskVectorSpecification is not provided in stat_info.") + return False + + semantic_data_type = semantic_spec["Data"]["Values"] + if len(semantic_data_type) > 0 and semantic_data_type != ["Text"]: + logger.warning("User doesn't provide correct data type, it must be Text.") + return False + + semantic_task_type = semantic_spec["Task"]["Values"] + if len(semantic_task_type) > 0 and semantic_task_type != ["Text Generation"]: + logger.warning("User doesn't provide correct task type, it must be Text Generation.") + return False + + return True + except Exception as err: + if verbose: + logger.warning("Invalid llm search information provided.") + return False diff --git a/learnware/market/module.py b/learnware/market/module.py index 5586d996..e110e1cb 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -37,7 +37,7 @@ def get_market_component( "checker_list": hetero_checker_list, } elif name == "llm": - llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) + llm_organizer = EasyOrganizer(market_id=market_id, rebuild=rebuild) llm_searcher = LLMSearcher(organizer=llm_organizer) llm_checker_list = [ EasySemanticChecker(), diff --git a/learnware/market/utils.py b/learnware/market/utils.py index 6ad9358f..f50e6a7b 100644 --- a/learnware/market/utils.py +++ b/learnware/market/utils.py @@ -3,10 +3,10 @@ def parse_specification_type( spec_list=[ "HeteroMapTableSpecification", "RKMETableSpecification", + "TaskVectorSpecification" "RKMETextSpecification", "RKMEImageSpecification", "LLMGeneralCapabilitySpecification", - "TaskVectorSpecification" ], ): for spec in spec_list: diff --git a/learnware/model/__init__.py b/learnware/model/__init__.py index d237fd17..a1dd4259 100644 --- a/learnware/model/__init__.py +++ b/learnware/model/__init__.py @@ -1,3 +1,9 @@ from .base import BaseModel +from ..utils import is_torch_available -__all__ = ["BaseModel"] +if not is_torch_available(verbose=False): + TorchModel = None +else: + from .torch_model import TorchModel + +__all__ = ["BaseModel", "TorchModel"] diff --git a/learnware/model/torch_model.py b/learnware/model/torch_model.py new file mode 100644 index 00000000..79fa1ac7 --- /dev/null +++ b/learnware/model/torch_model.py @@ -0,0 +1,32 @@ +import os +import torch +import numpy as np +from torch import nn + +class TorchModel: + def __init__( + self, + model: nn.Module, + input_shape, + output_shape, + device=None, + ): + self._model = model + self.input_shape = input_shape + self.output_shape = output_shape + + @property + def model(self) -> nn.Module: + """ + fetch the inner model + """ + return self._model + + def predict(self, X: np.ndarray) -> np.ndarray: + pass + + def fit(self, X: np.ndarray, y: np.ndarray): + pass + + def finetune(self, X: np.ndarray, y: np.ndarray): + pass diff --git a/learnware/specification/system/llm_vector.py b/learnware/specification/system/llm_vector.py index ef40d014..7f50b79e 100644 --- a/learnware/specification/system/llm_vector.py +++ b/learnware/specification/system/llm_vector.py @@ -2,14 +2,15 @@ import numpy as np from .base import SystemStatSpecification +from ...model import TorchModel from ...tests.benchmarks import LLMBenchmark from ...logger import get_module_logger -logger = get_module_logger("llm_base_vector_spec") +logger = get_module_logger("llm_general_capability_spec") class LLMGeneralCapabilitySpecification(SystemStatSpecification): - """Large Language Model Base Vector Specification""" + """Large Language Model General Capability Specification""" def __init__(self): self.score_vector = None From 5f572830b6400a95086fd9508447fa51201b909b Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 6 Dec 2024 18:03:13 +0800 Subject: [PATCH 038/108] [MNT | ENH] refactor searchers into BasicSearcher and CombinedSearcher --- learnware/market/__init__.py | 21 ++- learnware/market/anchor/searcher.py | 4 +- learnware/market/base.py | 50 +++++++ learnware/market/easy/__init__.py | 6 +- learnware/market/easy/searcher.py | 153 +++++++++++---------- learnware/market/heterogeneous/__init__.py | 8 +- learnware/market/heterogeneous/searcher.py | 65 ++++++--- learnware/market/heterogeneous/utils.py | 51 ------- learnware/market/llm/__init__.py | 8 +- learnware/market/llm/searcher.py | 66 +++++---- learnware/market/llm/utils.py | 39 ------ learnware/market/module.py | 50 ++++++- learnware/model/torch_model.py | 8 +- learnware/tests/benchmarks/__init__.py | 4 +- 14 files changed, 285 insertions(+), 248 deletions(-) delete mode 100644 learnware/market/heterogeneous/utils.py delete mode 100644 learnware/market/llm/utils.py diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 6e9e718e..184f9481 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -1,10 +1,19 @@ from .anchor import AnchoredOrganizer, AnchoredSearcher, AnchoredUserInfo from .base import BaseChecker, BaseOrganizer, BaseSearcher, BaseUserInfo, LearnwareMarket from .classes import CondaChecker -from .easy import EasyOrganizer, EasySearcher, EasySemanticChecker, EasyStatChecker +from .easy import ( + EasyOrganizer, + EasyExactSemanticSearcher, + EasyFuzzSemanticSearcher, + EasyStatSearcher, + CombinedSearcher, + EasySemanticChecker, + EasyStatChecker, +) from .evolve import EvolvedOrganizer from .evolve_anchor import EvolvedAnchoredOrganizer -from .heterogeneous import HeteroMapTableOrganizer, HeteroSearcher +from .heterogeneous import HeteroMapTableOrganizer, HeteroStatSearcher +from .llm import LLMStatSearcher from .module import instantiate_learnware_market __all__ = [ @@ -18,12 +27,16 @@ "LearnwareMarket", "CondaChecker", "EasyOrganizer", - "EasySearcher", + "EasyExactSemanticSearcher", + "EasyFuzzSemanticSearcher", + "EasyStatSearcher", + "CombinedSearcher", "EasySemanticChecker", "EasyStatChecker", "EvolvedOrganizer", "EvolvedAnchoredOrganizer", "HeteroMapTableOrganizer", - "HeteroSearcher", + "HeteroStatSearcher", + "LLMStatSearcher", "instantiate_learnware_market", ] diff --git a/learnware/market/anchor/searcher.py b/learnware/market/anchor/searcher.py index c8fe2a26..2ef2a5cd 100644 --- a/learnware/market/anchor/searcher.py +++ b/learnware/market/anchor/searcher.py @@ -1,14 +1,14 @@ from typing import Any, List, Tuple from .user_info import AnchoredUserInfo -from ..easy.searcher import EasySearcher +from ..easy.searcher import BasicSearcher from ...learnware import Learnware from ...logger import get_module_logger logger = get_module_logger("anchor_searcher") -class AnchoredSearcher(EasySearcher): +class AnchoredSearcher(BasicSearcher): def search_anchor_learnware(self, user_info: AnchoredUserInfo) -> Tuple[Any, List[Learnware]]: """Search anchor Learnwares from anchor_learnware_list based on user_info diff --git a/learnware/market/base.py b/learnware/market/base.py index 3b53798c..4ec78eff 100644 --- a/learnware/market/base.py +++ b/learnware/market/base.py @@ -203,6 +203,8 @@ def search_learnware(self, user_info: BaseUserInfo, check_status: int = None, ** SearchResults Search results """ + # searcher = self.searcher_selector.select_searcher(user_info) + # return searcher(user_info, check_status, **kwargs) return self.learnware_searcher(user_info, check_status, **kwargs) def delete_learnware(self, id: str, **kwargs) -> bool: @@ -501,6 +503,54 @@ def __call__(self, user_info: BaseUserInfo, check_status: int = None) -> SearchR raise NotImplementedError("'__call__' method is not implemented in BaseSearcher") +class BasicSearcher(BaseSearcher): + def __init__(self, organizer: BaseOrganizer, **kwargs): + super(BasicSearcher, self).__init__(organizer, **kwargs) + + def is_applicable_user(self, user_info: BaseUserInfo, **kwargs) -> bool: + """Check if the user_info is applicable for this searcher + + Parameters + ---------- + user_info : BaseUserInfo + user_info contains semantic_spec and stat_info + + Returns + ------- + bool + A flag indicating whether the user_info is applicable for this searcher + """ + raise NotImplementedError("'is_applicable_user' method is not implemented in BasicSearcher") + + def is_applicable_learnware(self, learnware: Learnware, **kwargs) -> bool: + """Check if the learnware is applicable for this searcher + + Parameters + ---------- + learnware : Learnware + learnware to be checked + + Returns + ------- + bool + A flag indicating whether the learnware is applicable for this searcher + """ + raise NotImplementedError("'is_applicable_learnware' method is not implemented in BasicSearcher") + + def __call__(self, user_info: BaseUserInfo, check_status: int = None) -> SearchResults: + """Search learnwares based on user_info from learnwares with check_status + + Parameters + ---------- + user_info : BaseUserInfo + user_info contains semantic_spec and stat_info + check_status : int, optional + - None: search from all learnwares + - Others: search from learnwares with check_status + """ + raise NotImplementedError("'__call__' method is not implemented in BasicSearcher") + + class BaseChecker: INVALID_LEARNWARE = -1 NONUSABLE_LEARNWARE = 0 diff --git a/learnware/market/easy/__init__.py b/learnware/market/easy/__init__.py index bbedeefb..7305a984 100644 --- a/learnware/market/easy/__init__.py +++ b/learnware/market/easy/__init__.py @@ -5,16 +5,16 @@ logger = get_module_logger("market_easy") if not is_torch_available(verbose=False): - EasySearcher = None EasySemanticChecker = None EasyStatChecker = None EasyExactSemanticSearcher = None EasyFuzzSemanticSearcher = None EasyStatSearcher = None + CombinedSearcher = None logger.error("EasySeacher and EasyChecker are not available because 'torch' is not installed!") else: from .checker import EasySemanticChecker, EasyStatChecker - from .searcher import EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, EasySearcher, EasyStatSearcher + from .searcher import EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, EasyStatSearcher, CombinedSearcher __all__ = [ "EasyOrganizer", @@ -22,6 +22,6 @@ "EasyStatChecker", "EasyExactSemanticSearcher", "EasyFuzzSemanticSearcher", - "EasySearcher", "EasyStatSearcher", + "CombinedSearcher", ] diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index dcfb7dfa..3f2e10a8 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -5,8 +5,15 @@ import torch from rapidfuzz import fuzz -from .organizer import EasyOrganizer -from ..base import BaseSearcher, BaseUserInfo, MultipleSearchItem, SearchResults, SingleSearchItem +from ..base import ( + BaseOrganizer, + BaseSearcher, + BasicSearcher, + BaseUserInfo, + MultipleSearchItem, + SearchResults, + SingleSearchItem, +) from ..utils import parse_specification_type from ...learnware import Learnware from ...logger import get_module_logger @@ -15,7 +22,13 @@ logger = get_module_logger("easy_seacher") -class EasyExactSemanticSearcher(BaseSearcher): +class EasyExactSemanticSearcher(BasicSearcher): + def is_applicable_learnware(self, learnware: Learnware) -> bool: + return True + + def is_applicable_user(self, user_info: BaseUserInfo) -> bool: + return True + def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] for learnware in learnware_list: @@ -78,7 +91,13 @@ def __call__(self, learnware_list: List[Learnware], user_info: BaseUserInfo) -> return SearchResults(single_results=[SingleSearchItem(learnware=_learnware) for _learnware in match_learnwares]) -class EasyFuzzSemanticSearcher(BaseSearcher): +class EasyFuzzSemanticSearcher(BasicSearcher): + def is_applicable_learnware(self, learnware: Learnware) -> bool: + return True + + def is_applicable_user(self, user_info: BaseUserInfo) -> bool: + return True + def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] for learnware in learnware_list: @@ -203,7 +222,22 @@ def __call__( return SearchResults(single_results=[SingleSearchItem(learnware=_learnware) for _learnware in final_result]) -class EasyStatSearcher(BaseSearcher): +class EasyStatSearcher(BasicSearcher): + STAT_TYPES = ["RKMETableSpecification", "RKMEImageSpecification", "RKMETextSpecification"] + + def is_applicable_learnware(self, learnware: Learnware) -> bool: + return any(spec_type in learnware.specification.stat_spec for spec_type in self.STAT_TYPES) + + def is_applicable_user(self, user_info: BaseUserInfo) -> bool: + for spec_type in self.STAT_TYPES: + if spec_type in user_info.stat_info: + user_rkme = user_info.stat_info[spec_type] + + if np.isfinite(float(user_rkme.dist(user_rkme))): + return True + + return False + def _convert_dist_to_score( self, dist_list: List[float], dist_ratio: float = 0.1, min_score: float = 0.92, improve_score: float = 0.7 ) -> List[float]: @@ -430,43 +464,6 @@ def _filter_by_rkme_spec_single( idx = idx + 1 return sorted_score_list[:idx], learnware_list[:idx] - def _filter_by_rkme_spec_metadata( - self, - learnware_list: List[Learnware], - user_rkme: Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification], - ) -> List[Learnware]: - """Filter learnwares whose rkme metadata different from user_rkme - - Parameters - ---------- - learnware_list : List[Learnware] - The list of learnwares whose mixture approximates the user's rkme - user_rkme : Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification] - User RKME statistical specification - - Returns - ------- - List[Learnware] - Learnwares whose rkme dimensions equal user_rkme in user_info - """ - filtered_learnware_list = [] - user_rkme_dim = str(list(user_rkme.get_z().shape)[1:]) - - for learnware in learnware_list: - if self.stat_spec_type not in learnware.specification.stat_spec: - continue - rkme = learnware.specification.get_stat_spec_by_name(self.stat_spec_type) - if self.stat_spec_type == "RKMETextSpecification" and not set(user_rkme.language).issubset( - set(rkme.language) - ): - continue - - rkme_dim = str(list(rkme.get_z().shape)[1:]) - if rkme_dim == user_rkme_dim: - filtered_learnware_list.append(learnware) - - return filtered_learnware_list - def _search_by_rkme_spec_mixture_greedy( self, learnware_list: List[Learnware], @@ -586,15 +583,9 @@ def __call__( max_search_num: int = 5, search_method: str = "greedy", ) -> SearchResults: - self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info) - if self.stat_spec_type is None: - raise KeyError("No supported stat specification is given in the user info") - + self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info, spec_list=self.STAT_TYPES) user_rkme = user_info.stat_info[self.stat_spec_type] - if not np.isfinite(float(user_rkme.dist(user_rkme))): - raise ValueError("The distance between uploaded statistical specifications is not finite!") - learnware_list = self._filter_by_rkme_spec_metadata(learnware_list, user_rkme) logger.info(f"After filter by rkme dimension, learnware_list length is {len(learnware_list)}") sorted_dist_list, single_learnware_list = self._search_by_rkme_spec_single(learnware_list, user_rkme) @@ -664,48 +655,62 @@ def __call__( return search_results -class EasySearcher(BaseSearcher): - def __init__(self, organizer: EasyOrganizer): - self.semantic_searcher = EasyFuzzSemanticSearcher(organizer) - self.stat_searcher = EasyStatSearcher(organizer) - super(EasySearcher, self).__init__(organizer) - - def reset(self, organizer): +class CombinedSearcher(BaseSearcher): + def __init__( + self, + organizer: BaseOrganizer, + semantic_searcher_list: List[BasicSearcher], + stat_searcher_list: List[BasicSearcher], + ): + self.semantic_searcher_list = semantic_searcher_list + self.stat_searcher_list = stat_searcher_list + super(CombinedSearcher, self).__init__(organizer) + + def reset(self, organizer: BaseOrganizer): self.learnware_organizer = organizer - self.semantic_searcher.reset(organizer) - self.stat_searcher.reset(organizer) + for searcher in self.semantic_searcher_list + self.stat_searcher_list: + searcher.reset(organizer) def __call__( self, user_info: BaseUserInfo, check_status: int = None, max_search_num: int = 5, search_method: str = "greedy" ) -> SearchResults: - """Search learnwares based on user_info from learnwares with check_status + """ + Search learnwares based on user_info, iterating over semantic and stat searchers to find applicable results. Parameters ---------- user_info : BaseUserInfo - user_info contains semantic_spec and stat_info - max_search_num : int - The maximum number of the returned learnwares + The user information for searching learnwares. + max_search_num : int, optional + The maximum number of the returned learnwares. check_status : int, optional - None: search from all learnwares - - Others: search from learnwares with check_status + - Others: search from learnwares with check_status. Returns ------- - Tuple[List[float], List[Learnware], float, List[Learnware]] - the first is the sorted list of rkme dist - the second is the sorted list of Learnware (single) by the rkme dist - the third is the score of Learnware (mixture) - the fourth is the list of Learnware (mixture), the size is search_num + SearchResults + The search results, including sorted lists of learnwares and associated scores. """ learnware_list = self.learnware_organizer.get_learnwares(check_status=check_status) - semantic_search_result = self.semantic_searcher(learnware_list, user_info) - learnware_list = [search_item.learnware for search_item in semantic_search_result.get_single_results()] + for semantic_searcher in self.semantic_searchers: + if semantic_searcher.is_applicable(user_info): + filtered_learnware_list = [ + learnware for learnware in learnware_list if semantic_searcher.is_applicable_learnware(learnware) + ] + semantic_search_result = semantic_searcher(filtered_learnware_list, user_info) + learnware_list = [search_item.learnware for search_item in semantic_search_result.get_single_results()] + break + if len(learnware_list) == 0: return SearchResults() - if parse_specification_type(stat_specs=user_info.stat_info) is not None: - return self.stat_searcher(learnware_list, user_info, max_search_num, search_method) - else: - return semantic_search_result + for stat_searcher in self.stat_searchers: + if stat_searcher.is_applicable(user_info): + filtered_learnware_list = [ + learnware for learnware in learnware_list if stat_searcher.is_applicable_learnware(learnware) + ] + return stat_searcher(filtered_learnware_list, user_info, max_search_num, search_method) + + return semantic_search_result diff --git a/learnware/market/heterogeneous/__init__.py b/learnware/market/heterogeneous/__init__.py index 4162f1d2..3c228a24 100644 --- a/learnware/market/heterogeneous/__init__.py +++ b/learnware/market/heterogeneous/__init__.py @@ -5,10 +5,10 @@ if not is_torch_available(verbose=False): HeteroMapTableOrganizer = None - HeteroSearcher = None - logger.error("HeteroMapTableOrganizer and HeteroSearcher are not available because 'torch' is not installed!") + HeteroStatSearcher = None + logger.error("HeteroMapTableOrganizer and HeteroStatSearcher are not available because 'torch' is not installed!") else: from .organizer import HeteroMapTableOrganizer - from .searcher import HeteroSearcher + from .searcher import HeteroStatSearcher -__all__ = ["HeteroMapTableOrganizer", "HeteroSearcher"] +__all__ = ["HeteroMapTableOrganizer", "HeteroStatSearcher"] diff --git a/learnware/market/heterogeneous/searcher.py b/learnware/market/heterogeneous/searcher.py index 5a10ac0c..0ff94ec0 100644 --- a/learnware/market/heterogeneous/searcher.py +++ b/learnware/market/heterogeneous/searcher.py @@ -1,15 +1,54 @@ from typing import Optional -from .utils import is_hetero from ..base import BaseUserInfo, SearchResults -from ..easy import EasySearcher -from ..utils import parse_specification_type +from ..easy import EasyStatSearcher from ...logger import get_module_logger logger = get_module_logger("hetero_searcher") -class HeteroSearcher(EasySearcher): +class HeteroStatSearcher(EasyStatSearcher): + SPEC_TYPES = ["HeteroMapTableSpecification"] + + def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: + stat_specs = user_info.stat_info + semantic_spec = user_info.semantic_spec + try: + table_stat_spec = stat_specs["RKMETableSpecification"] + table_input_shape = table_stat_spec.get_z().shape[1] + + semantic_data_type = semantic_spec["Data"]["Values"] + if len(semantic_data_type) > 0 and semantic_data_type != ["Table"]: + logger.warning("User doesn't provide correct data type, it must be Table.") + return False + + semantic_task_type = semantic_spec["Task"]["Values"] + if len(semantic_task_type) > 0 and semantic_task_type not in [["Classification"], ["Regression"]]: + logger.warning( + "User doesn't provide correct task type, it must be either Classification or Regression." + ) + return False + + semantic_input_description = semantic_spec["Input"] + semantic_description_dim = int(semantic_input_description["Dimension"]) + semantic_decription_feature_num = len(semantic_input_description["Description"]) + + if semantic_decription_feature_num <= 0: + if verbose: + logger.warning("At least one of Input.Description in semantic spec should be provides.") + return False + + if table_input_shape != semantic_description_dim: + if verbose: + logger.warning("User data feature dimensions mismatch with semantic specification.") + return False + + return True + except Exception as err: + if verbose: + logger.warning("Invalid heterogeneous search information provided.") + return False + def __call__( self, user_info: BaseUserInfo, @@ -38,17 +77,7 @@ def __call__( the third is the score of Learnware (mixture) the fourth is the list of Learnware (mixture), the size is search_num """ - learnware_list = self.learnware_organizer.get_learnwares(check_status=check_status) - semantic_search_result = self.semantic_searcher(learnware_list, user_info) - - learnware_list = [search_item.learnware for search_item in semantic_search_result.get_single_results()] - if len(learnware_list) == 0: - return SearchResults() - - if parse_specification_type(stat_specs=user_info.stat_info) is not None: - if is_hetero(stat_specs=user_info.stat_info, semantic_spec=user_info.semantic_spec): - user_hetero_spec = self.learnware_organizer.generate_hetero_map_spec(user_info) - user_info.update_stat_info(user_hetero_spec.type, user_hetero_spec) - return self.stat_searcher(learnware_list, user_info, max_search_num, search_method) - else: - return semantic_search_result + user_hetero_spec = self.learnware_organizer.generate_hetero_map_spec(user_info) + user_info.update_stat_info(user_hetero_spec.type, user_hetero_spec) + + return super().__call__(user_info, check_status, max_search_num, search_method) diff --git a/learnware/market/heterogeneous/utils.py b/learnware/market/heterogeneous/utils.py deleted file mode 100644 index 860159e3..00000000 --- a/learnware/market/heterogeneous/utils.py +++ /dev/null @@ -1,51 +0,0 @@ -from ...logger import get_module_logger - -logger = get_module_logger("hetero_utils") - - -def is_hetero(stat_specs: dict, semantic_spec: dict, verbose=True) -> bool: - """Check if user_info satifies all the criteria required for enabling heterogeneous learnware search - - Parameters - ---------- - user_info : BaseUserInfo - user_info contains semantic_spec and stat_info - - Returns - ------- - bool - A flag indicating whether heterogeneous search is enabled for user_info - """ - try: - table_stat_spec = stat_specs["RKMETableSpecification"] - table_input_shape = table_stat_spec.get_z().shape[1] - - semantic_data_type = semantic_spec["Data"]["Values"] - if len(semantic_data_type) > 0 and semantic_data_type != ["Table"]: - logger.warning("User doesn't provide correct data type, it must be Table.") - return False - - semantic_task_type = semantic_spec["Task"]["Values"] - if len(semantic_task_type) > 0 and semantic_task_type not in [["Classification"], ["Regression"]]: - logger.warning("User doesn't provide correct task type, it must be either Classification or Regression.") - return False - - semantic_input_description = semantic_spec["Input"] - semantic_description_dim = int(semantic_input_description["Dimension"]) - semantic_decription_feature_num = len(semantic_input_description["Description"]) - - if semantic_decription_feature_num <= 0: - if verbose: - logger.warning("At least one of Input.Description in semantic spec should be provides.") - return False - - if table_input_shape != semantic_description_dim: - if verbose: - logger.warning("User data feature dimensions mismatch with semantic specification.") - return False - - return True - except Exception as err: - if verbose: - logger.warning("Invalid heterogeneous search information provided.") - return False diff --git a/learnware/market/llm/__init__.py b/learnware/market/llm/__init__.py index 4f9f3399..7b1624a0 100644 --- a/learnware/market/llm/__init__.py +++ b/learnware/market/llm/__init__.py @@ -4,9 +4,9 @@ logger = get_module_logger("market_llm") if not is_torch_available(verbose=False): - LLMSearcher = None - logger.error("LLMSearcher are not available because 'torch' is not installed!") + LLMStatSearcher = None + logger.error("LLMStatSearcher is not available because 'torch' is not installed!") else: - from .searcher import LLMSearcher + from .searcher import LLMStatSearcher -__all__ = ["LLMSearcher"] \ No newline at end of file +__all__ = ["LLMStatSearcher"] diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 2a35c4c5..ff3c1185 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -1,39 +1,47 @@ from typing import Optional -from .utils import is_llm -from ..base import BaseUserInfo, SearchResults -from ..easy import EasySearcher -from ..utils import parse_specification_type +from ..base import BaseUserInfo, SearchResults, BasicSearcher from ...logger import get_module_logger - logger = get_module_logger("llm_searcher") -class LLMSearcher(EasySearcher): - def search_learnware(self, user_info: BaseUserInfo) -> SearchResults: - """Search helpful learnwares from learnware_list based on task vector specification +class LLMStatSearcher(BasicSearcher): + SPEC_TYPES = ["TaskVectorSpecification"] - Parameters - ---------- - user_info : BaseUserInfo - - user_info with semantic specifications and task vector specification + def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: + stat_specs = user_info.stat_info + semantic_spec = user_info.semantic_spec + try: + if "TaskVectorSpecification" not in stat_specs: + if verbose: + logger.warning("TaskVectorSpecification is not provided in stat_info.") + return False - Returns - ------- - Tuple[List[float], List[Learnware]] - the first is the sorted list of task vector similarities - the second is the sorted list of Learnware (single) by the task vector similarities - """ - pass + semantic_data_type = semantic_spec["Data"]["Values"] + if len(semantic_data_type) > 0 and semantic_data_type != ["Text"]: + logger.warning("User doesn't provide correct data type, it must be Text.") + return False + + semantic_task_type = semantic_spec["Task"]["Values"] + if len(semantic_task_type) > 0 and semantic_task_type != ["Text Generation"]: + logger.warning("User doesn't provide correct task type, it must be Text Generation.") + return False + + return True + except Exception as err: + if verbose: + logger.warning("Invalid llm search information provided.") + return False def __call__( self, user_info: BaseUserInfo, - check_status: Optional[int] = None + check_status: Optional[int] = None, + max_search_num: int = 5, + search_method: str = "greedy", ) -> SearchResults: - """Search learnwares based on user_info from learnwares with check_status. - Employs LLM learnware search if specific requirements are met, otherwise resorts to homogeneous search methods. + """Employ LLM learnware search based on user_info from learnwares with check_status. Parameters ---------- @@ -49,16 +57,4 @@ def __call__( the first is the sorted list of rkme dist the second is the sorted list of Learnware (single) by the rkme dist """ - learnware_list = self.learnware_organizer.get_learnwares(check_status=check_status) - semantic_search_result = self.semantic_searcher(learnware_list, user_info) - - learnware_list = [search_item.learnware for search_item in semantic_search_result.get_single_results()] - if len(learnware_list) == 0: - return SearchResults() - - if parse_specification_type(stat_specs=user_info.stat_info) is not None: - if is_llm(stat_specs=user_info.stat_info, semantic_spec=user_info.semantic_spec): - return self.search_learnware(user_info) - return self.stat_searcher(learnware_list, user_info, max_search_num=1, search_method="greedy") - else: - return semantic_search_result \ No newline at end of file + pass diff --git a/learnware/market/llm/utils.py b/learnware/market/llm/utils.py deleted file mode 100644 index a14921f4..00000000 --- a/learnware/market/llm/utils.py +++ /dev/null @@ -1,39 +0,0 @@ -from ...logger import get_module_logger - -logger = get_module_logger("llm_utils") - - -def is_llm(stat_specs: dict, semantic_spec: dict, verbose=True) -> bool: - """Check if user_info satifies all the criteria required for enabling llm learnware search - - Parameters - ---------- - user_info : BaseUserInfo - user_info contains semantic_spec and stat_info - - Returns - ------- - bool - A flag indicating whether llm search is enabled for user_info - """ - try: - if "TaskVectorSpecification" not in stat_specs: - if verbose: - logger.warning("TaskVectorSpecification is not provided in stat_info.") - return False - - semantic_data_type = semantic_spec["Data"]["Values"] - if len(semantic_data_type) > 0 and semantic_data_type != ["Text"]: - logger.warning("User doesn't provide correct data type, it must be Text.") - return False - - semantic_task_type = semantic_spec["Task"]["Values"] - if len(semantic_task_type) > 0 and semantic_task_type != ["Text Generation"]: - logger.warning("User doesn't provide correct task type, it must be Text Generation.") - return False - - return True - except Exception as err: - if verbose: - logger.warning("Invalid llm search information provided.") - return False diff --git a/learnware/market/module.py b/learnware/market/module.py index e110e1cb..7ed92e4c 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -1,8 +1,16 @@ from .base import LearnwareMarket from .classes import CondaChecker -from .easy import EasyOrganizer, EasySearcher, EasySemanticChecker, EasyStatChecker -from .heterogeneous import HeteroMapTableOrganizer, HeteroSearcher -from .llm import LLMSearcher +from .easy import ( + EasyOrganizer, + EasyFuzzSemanticSearcher, + EasyStatSearcher, + CombinedSearcher, + EasySemanticChecker, + EasyStatChecker, +) +from .heterogeneous import HeteroMapTableOrganizer, HeteroStatSearcher +from .llm import LLMStatSearcher + def get_market_component( name, market_id, rebuild, organizer_kwargs=None, searcher_kwargs=None, checker_kwargs=None, conda_checker=False @@ -13,19 +21,37 @@ def get_market_component( if name == "easy": easy_organizer = EasyOrganizer(market_id=market_id, rebuild=rebuild) - easy_searcher = EasySearcher(organizer=easy_organizer) + + semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=easy_organizer)] + stat_searcher_list = [EasyStatSearcher(organizer=easy_organizer)] + easy_searcher = CombinedSearcher( + organizer=easy_organizer, + semantic_searcher_list=semantic_searcher_list, + stat_searcher_list=stat_searcher_list, + ) + easy_checker_list = [ EasySemanticChecker(), EasyStatChecker() if conda_checker is False else CondaChecker(EasyStatChecker()), ] + market_component = { "organizer": easy_organizer, "searcher": easy_searcher, "checker_list": easy_checker_list, } + elif name == "hetero": hetero_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) - hetero_searcher = HeteroSearcher(organizer=hetero_organizer) + + semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=hetero_organizer)] + stat_searcher_list = [HeteroStatSearcher(organizer=hetero_organizer)] + hetero_searcher = CombinedSearcher( + organizer=hetero_organizer, + semantic_searcher_list=semantic_searcher_list, + stat_searcher_list=stat_searcher_list, + ) + hetero_checker_list = [ EasySemanticChecker(), EasyStatChecker() if conda_checker is False else CondaChecker(EasyStatChecker()), @@ -36,9 +62,18 @@ def get_market_component( "searcher": hetero_searcher, "checker_list": hetero_checker_list, } + elif name == "llm": - llm_organizer = EasyOrganizer(market_id=market_id, rebuild=rebuild) - llm_searcher = LLMSearcher(organizer=llm_organizer) + llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) + + semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=llm_organizer)] + stat_searcher_list = [LLMStatSearcher(organizer=llm_organizer)] + llm_searcher = CombinedSearcher( + organizer=llm_organizer, + semantic_searcher_list=semantic_searcher_list, + stat_searcher_list=stat_searcher_list, + ) + llm_checker_list = [ EasySemanticChecker(), EasyStatChecker() if conda_checker is False else CondaChecker(EasyStatChecker()), @@ -49,6 +84,7 @@ def get_market_component( "searcher": llm_searcher, "checker_list": llm_checker_list, } + else: raise ValueError(f"name {name} is not supported for market") diff --git a/learnware/model/torch_model.py b/learnware/model/torch_model.py index 79fa1ac7..66370191 100644 --- a/learnware/model/torch_model.py +++ b/learnware/model/torch_model.py @@ -1,15 +1,13 @@ -import os -import torch import numpy as np from torch import nn + class TorchModel: def __init__( self, model: nn.Module, - input_shape, - output_shape, - device=None, + input_shape: tuple, + output_shape: tuple, ): self._model = model self.input_shape = input_shape diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index a5f48fa2..f857413b 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -3,7 +3,7 @@ import tempfile import zipfile from dataclasses import dataclass -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union, Dict import numpy as np @@ -197,4 +197,4 @@ def list_specific_capability_benchmarks(self) -> Dict: def get_benchmark(self, benchmark_name: str) -> Dict[str, List[str]]: # preprocess hugging face datasets into list of strings # need to specify hugging face save path with root dir C.cache_dir - pass \ No newline at end of file + pass From 098481a01967d98c11dccf471dce15bd792dcbc8 Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 6 Dec 2024 18:34:36 +0800 Subject: [PATCH 039/108] [FIX] fix bugs in market module --- learnware/market/module.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/learnware/market/module.py b/learnware/market/module.py index 7ed92e4c..a9b36e68 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -22,8 +22,8 @@ def get_market_component( if name == "easy": easy_organizer = EasyOrganizer(market_id=market_id, rebuild=rebuild) - semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=easy_organizer)] - stat_searcher_list = [EasyStatSearcher(organizer=easy_organizer)] + semantic_searcher_list = [EasyFuzzSemanticSearcher()] + stat_searcher_list = [EasyStatSearcher()] easy_searcher = CombinedSearcher( organizer=easy_organizer, semantic_searcher_list=semantic_searcher_list, @@ -45,7 +45,7 @@ def get_market_component( hetero_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=hetero_organizer)] - stat_searcher_list = [HeteroStatSearcher(organizer=hetero_organizer)] + stat_searcher_list = [HeteroStatSearcher(), EasyStatSearcher()] hetero_searcher = CombinedSearcher( organizer=hetero_organizer, semantic_searcher_list=semantic_searcher_list, @@ -67,7 +67,7 @@ def get_market_component( llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=llm_organizer)] - stat_searcher_list = [LLMStatSearcher(organizer=llm_organizer)] + stat_searcher_list = [LLMStatSearcher(), HeteroStatSearcher(), EasyStatSearcher()] llm_searcher = CombinedSearcher( organizer=llm_organizer, semantic_searcher_list=semantic_searcher_list, From 71aa76e04b05d30c581d3b6be2dd45765bd5bae4 Mon Sep 17 00:00:00 2001 From: liuht Date: Thu, 12 Dec 2024 22:33:12 +0800 Subject: [PATCH 040/108] [MNT] add LLMBenchmark; extend LearnwareBenchmark to LearnwareBenchmarkManager --- examples/dataset_image_workflow/workflow.py | 4 +- examples/dataset_table_workflow/base.py | 4 +- examples/dataset_text_workflow/workflow.py | 4 +- learnware/market/__init__.py | 4 +- learnware/market/anchor/searcher.py | 4 +- learnware/market/base.py | 10 +- learnware/market/easy/__init__.py | 6 +- learnware/market/easy/searcher.py | 28 +++--- learnware/market/llm/searcher.py | 4 +- learnware/market/module.py | 10 +- learnware/model/torch_model.py | 2 +- learnware/specification/system/llm_vector.py | 7 +- learnware/tests/benchmarks/__init__.py | 93 ++++++++++--------- learnware/tests/benchmarks/config.py | 10 +- .../tests/benchmarks/llm_process_funcs.py | 0 15 files changed, 101 insertions(+), 89 deletions(-) create mode 100644 learnware/tests/benchmarks/llm_process_funcs.py diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py index 685a47fa..2cb29ef1 100644 --- a/examples/dataset_image_workflow/workflow.py +++ b/examples/dataset_image_workflow/workflow.py @@ -18,7 +18,7 @@ from learnware.market import BaseUserInfo, instantiate_learnware_market from learnware.reuse import AveragingReuser, EnsemblePruningReuser, JobSelectorReuser from learnware.specification import generate_stat_spec -from learnware.tests.benchmarks import LearnwareBenchmark +from learnware.tests.benchmarks import LearnwareBenchmarkManager from learnware.utils import choose_device logger = get_module_logger("image_workflow", level="INFO") @@ -57,7 +57,7 @@ def _plot_labeled_peformance_curves(self, all_user_curves_data): def _prepare_market(self, rebuild=False): client = LearnwareClient() - self.image_benchmark = LearnwareBenchmark().get_benchmark(image_benchmark_config) + self.image_benchmark = LearnwareBenchmarkManager().get_benchmark(image_benchmark_config) self.image_market = instantiate_learnware_market(market_id=self.image_benchmark.name, rebuild=rebuild) self.user_semantic = client.get_semantic_specification(self.image_benchmark.learnware_ids[0]) self.user_semantic["Name"]["Values"] = "" diff --git a/examples/dataset_table_workflow/base.py b/examples/dataset_table_workflow/base.py index 6f6559cd..3d26fd60 100644 --- a/examples/dataset_table_workflow/base.py +++ b/examples/dataset_table_workflow/base.py @@ -14,7 +14,7 @@ from learnware.logger import get_module_logger from learnware.market import instantiate_learnware_market from learnware.reuse.utils import fill_data_with_mean -from learnware.tests.benchmarks import LearnwareBenchmark +from learnware.tests.benchmarks import LearnwareBenchmarkManager logger = get_module_logger("base_table", level="INFO") @@ -63,7 +63,7 @@ def get_train_subsets(n_labeled_list, n_repeat_list, train_x, train_y): def _prepare_market(self, benchmark_config, name, rebuild, retrain): client = LearnwareClient() - self.benchmark = LearnwareBenchmark().get_benchmark(benchmark_config) + self.benchmark = LearnwareBenchmarkManager().get_benchmark(benchmark_config) self.market = instantiate_learnware_market( market_id=self.benchmark.name, name=name, diff --git a/examples/dataset_text_workflow/workflow.py b/examples/dataset_text_workflow/workflow.py index 42bc315f..04e9ef70 100644 --- a/examples/dataset_text_workflow/workflow.py +++ b/examples/dataset_text_workflow/workflow.py @@ -17,7 +17,7 @@ from learnware.market import BaseUserInfo, instantiate_learnware_market from learnware.reuse import AveragingReuser, EnsemblePruningReuser, JobSelectorReuser from learnware.specification import RKMETextSpecification -from learnware.tests.benchmarks import LearnwareBenchmark +from learnware.tests.benchmarks import LearnwareBenchmarkManager logger = get_module_logger("text_workflow", level="INFO") @@ -72,7 +72,7 @@ def _plot_labeled_peformance_curves(self, all_user_curves_data): def _prepare_market(self, rebuild=False): client = LearnwareClient() - self.text_benchmark = LearnwareBenchmark().get_benchmark(text_benchmark_config) + self.text_benchmark = LearnwareBenchmarkManager().get_benchmark(text_benchmark_config) self.text_market = instantiate_learnware_market(market_id=self.text_benchmark.name, rebuild=rebuild) self.user_semantic = client.get_semantic_specification(self.text_benchmark.learnware_ids[0]) self.user_semantic["Name"]["Values"] = "" diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 184f9481..6cb5ee58 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -6,7 +6,7 @@ EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, EasyStatSearcher, - CombinedSearcher, + SeqCombinedSearcher, EasySemanticChecker, EasyStatChecker, ) @@ -30,7 +30,7 @@ "EasyExactSemanticSearcher", "EasyFuzzSemanticSearcher", "EasyStatSearcher", - "CombinedSearcher", + "SeqCombinedSearcher", "EasySemanticChecker", "EasyStatChecker", "EvolvedOrganizer", diff --git a/learnware/market/anchor/searcher.py b/learnware/market/anchor/searcher.py index 2ef2a5cd..5d6eef22 100644 --- a/learnware/market/anchor/searcher.py +++ b/learnware/market/anchor/searcher.py @@ -1,14 +1,14 @@ from typing import Any, List, Tuple from .user_info import AnchoredUserInfo -from ..easy.searcher import BasicSearcher +from ..base import AtomicSearcher from ...learnware import Learnware from ...logger import get_module_logger logger = get_module_logger("anchor_searcher") -class AnchoredSearcher(BasicSearcher): +class AnchoredSearcher(AtomicSearcher): def search_anchor_learnware(self, user_info: AnchoredUserInfo) -> Tuple[Any, List[Learnware]]: """Search anchor Learnwares from anchor_learnware_list based on user_info diff --git a/learnware/market/base.py b/learnware/market/base.py index 4ec78eff..2b87f564 100644 --- a/learnware/market/base.py +++ b/learnware/market/base.py @@ -503,9 +503,9 @@ def __call__(self, user_info: BaseUserInfo, check_status: int = None) -> SearchR raise NotImplementedError("'__call__' method is not implemented in BaseSearcher") -class BasicSearcher(BaseSearcher): +class AtomicSearcher(BaseSearcher): def __init__(self, organizer: BaseOrganizer, **kwargs): - super(BasicSearcher, self).__init__(organizer, **kwargs) + super(AtomicSearcher, self).__init__(organizer, **kwargs) def is_applicable_user(self, user_info: BaseUserInfo, **kwargs) -> bool: """Check if the user_info is applicable for this searcher @@ -520,7 +520,7 @@ def is_applicable_user(self, user_info: BaseUserInfo, **kwargs) -> bool: bool A flag indicating whether the user_info is applicable for this searcher """ - raise NotImplementedError("'is_applicable_user' method is not implemented in BasicSearcher") + raise NotImplementedError("'is_applicable_user' method is not implemented in AtomicSearcher") def is_applicable_learnware(self, learnware: Learnware, **kwargs) -> bool: """Check if the learnware is applicable for this searcher @@ -535,7 +535,7 @@ def is_applicable_learnware(self, learnware: Learnware, **kwargs) -> bool: bool A flag indicating whether the learnware is applicable for this searcher """ - raise NotImplementedError("'is_applicable_learnware' method is not implemented in BasicSearcher") + raise NotImplementedError("'is_applicable_learnware' method is not implemented in AtomicSearcher") def __call__(self, user_info: BaseUserInfo, check_status: int = None) -> SearchResults: """Search learnwares based on user_info from learnwares with check_status @@ -548,7 +548,7 @@ def __call__(self, user_info: BaseUserInfo, check_status: int = None) -> SearchR - None: search from all learnwares - Others: search from learnwares with check_status """ - raise NotImplementedError("'__call__' method is not implemented in BasicSearcher") + raise NotImplementedError("'__call__' method is not implemented in AtomicSearcher") class BaseChecker: diff --git a/learnware/market/easy/__init__.py b/learnware/market/easy/__init__.py index 7305a984..1ec3058f 100644 --- a/learnware/market/easy/__init__.py +++ b/learnware/market/easy/__init__.py @@ -10,11 +10,11 @@ EasyExactSemanticSearcher = None EasyFuzzSemanticSearcher = None EasyStatSearcher = None - CombinedSearcher = None + SeqCombinedSearcher = None logger.error("EasySeacher and EasyChecker are not available because 'torch' is not installed!") else: from .checker import EasySemanticChecker, EasyStatChecker - from .searcher import EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, EasyStatSearcher, CombinedSearcher + from .searcher import EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, EasyStatSearcher, SeqCombinedSearcher __all__ = [ "EasyOrganizer", @@ -23,5 +23,5 @@ "EasyExactSemanticSearcher", "EasyFuzzSemanticSearcher", "EasyStatSearcher", - "CombinedSearcher", + "SeqCombinedSearcher", ] diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 3f2e10a8..3c5a808b 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -8,7 +8,7 @@ from ..base import ( BaseOrganizer, BaseSearcher, - BasicSearcher, + AtomicSearcher, BaseUserInfo, MultipleSearchItem, SearchResults, @@ -22,12 +22,14 @@ logger = get_module_logger("easy_seacher") -class EasyExactSemanticSearcher(BasicSearcher): +class EasyExactSemanticSearcher(AtomicSearcher): def is_applicable_learnware(self, learnware: Learnware) -> bool: - return True + learnware_semantic_spec = learnware.specification.get_semantic_spec + return learnware_semantic_spec def is_applicable_user(self, user_info: BaseUserInfo) -> bool: - return True + user_semantic_spec = user_info.get_semantic_spec() + return user_semantic_spec def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] @@ -91,12 +93,14 @@ def __call__(self, learnware_list: List[Learnware], user_info: BaseUserInfo) -> return SearchResults(single_results=[SingleSearchItem(learnware=_learnware) for _learnware in match_learnwares]) -class EasyFuzzSemanticSearcher(BasicSearcher): +class EasyFuzzSemanticSearcher(AtomicSearcher): def is_applicable_learnware(self, learnware: Learnware) -> bool: - return True + learnware_semantic_spec = learnware.specification.get_semantic_spec + return learnware_semantic_spec def is_applicable_user(self, user_info: BaseUserInfo) -> bool: - return True + user_semantic_spec = user_info.get_semantic_spec() + return user_semantic_spec def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] @@ -222,7 +226,7 @@ def __call__( return SearchResults(single_results=[SingleSearchItem(learnware=_learnware) for _learnware in final_result]) -class EasyStatSearcher(BasicSearcher): +class EasyStatSearcher(AtomicSearcher): STAT_TYPES = ["RKMETableSpecification", "RKMEImageSpecification", "RKMETextSpecification"] def is_applicable_learnware(self, learnware: Learnware) -> bool: @@ -655,16 +659,16 @@ def __call__( return search_results -class CombinedSearcher(BaseSearcher): +class SeqCombinedSearcher(BaseSearcher): def __init__( self, organizer: BaseOrganizer, - semantic_searcher_list: List[BasicSearcher], - stat_searcher_list: List[BasicSearcher], + semantic_searcher_list: List[AtomicSearcher], + stat_searcher_list: List[AtomicSearcher], ): self.semantic_searcher_list = semantic_searcher_list self.stat_searcher_list = stat_searcher_list - super(CombinedSearcher, self).__init__(organizer) + super(SeqCombinedSearcher, self).__init__(organizer) def reset(self, organizer: BaseOrganizer): self.learnware_organizer = organizer diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index ff3c1185..ca4c7769 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -1,12 +1,12 @@ from typing import Optional -from ..base import BaseUserInfo, SearchResults, BasicSearcher +from ..base import BaseUserInfo, SearchResults, AtomicSearcher from ...logger import get_module_logger logger = get_module_logger("llm_searcher") -class LLMStatSearcher(BasicSearcher): +class LLMStatSearcher(AtomicSearcher): SPEC_TYPES = ["TaskVectorSpecification"] def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: diff --git a/learnware/market/module.py b/learnware/market/module.py index a9b36e68..8c87e5cd 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -4,7 +4,7 @@ EasyOrganizer, EasyFuzzSemanticSearcher, EasyStatSearcher, - CombinedSearcher, + SeqCombinedSearcher, EasySemanticChecker, EasyStatChecker, ) @@ -24,7 +24,7 @@ def get_market_component( semantic_searcher_list = [EasyFuzzSemanticSearcher()] stat_searcher_list = [EasyStatSearcher()] - easy_searcher = CombinedSearcher( + easy_searcher = SeqCombinedSearcher( organizer=easy_organizer, semantic_searcher_list=semantic_searcher_list, stat_searcher_list=stat_searcher_list, @@ -46,7 +46,7 @@ def get_market_component( semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=hetero_organizer)] stat_searcher_list = [HeteroStatSearcher(), EasyStatSearcher()] - hetero_searcher = CombinedSearcher( + hetero_searcher = SeqCombinedSearcher( organizer=hetero_organizer, semantic_searcher_list=semantic_searcher_list, stat_searcher_list=stat_searcher_list, @@ -66,9 +66,9 @@ def get_market_component( elif name == "llm": llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) - semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=llm_organizer)] + semantic_searcher_list = [EasyFuzzSemanticSearcher()] stat_searcher_list = [LLMStatSearcher(), HeteroStatSearcher(), EasyStatSearcher()] - llm_searcher = CombinedSearcher( + llm_searcher = SeqCombinedSearcher( organizer=llm_organizer, semantic_searcher_list=semantic_searcher_list, stat_searcher_list=stat_searcher_list, diff --git a/learnware/model/torch_model.py b/learnware/model/torch_model.py index 66370191..798c06dc 100644 --- a/learnware/model/torch_model.py +++ b/learnware/model/torch_model.py @@ -14,7 +14,7 @@ def __init__( self.output_shape = output_shape @property - def model(self) -> nn.Module: + def nn_model(self) -> nn.Module: """ fetch the inner model """ diff --git a/learnware/specification/system/llm_vector.py b/learnware/specification/system/llm_vector.py index 7f50b79e..749a6653 100644 --- a/learnware/specification/system/llm_vector.py +++ b/learnware/specification/system/llm_vector.py @@ -3,7 +3,7 @@ from .base import SystemStatSpecification from ...model import TorchModel -from ...tests.benchmarks import LLMBenchmark +from ...tests.benchmarks import LearnwareBenchmarkManager from ...logger import get_module_logger logger = get_module_logger("llm_general_capability_spec") @@ -18,10 +18,9 @@ def __init__(self): def generate_stat_spec_from_system(self, model: TorchModel) -> np.ndarray: # model: foundation model - # List[str]: each str is a dataset name - dataset_names = LLMBenchmark().get_general_capability_datasets() + dataset_names = LearnwareBenchmarkManager().list_benchmarks() pass def get_spec(self) -> np.ndarray: - return self.score_vector \ No newline at end of file + return self.score_vector diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index f857413b..c59418bf 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -3,11 +3,11 @@ import tempfile import zipfile from dataclasses import dataclass -from typing import List, Optional, Tuple, Union, Dict +from typing import List, Optional, Tuple, Union, Callable import numpy as np -from .config import BenchmarkConfig, benchmark_configs +from .config import BenchmarkConfig, LLMBenchmarkConfig, benchmark_configs from ..data import GetData from ...config import C @@ -71,7 +71,19 @@ def get_train_data( return ret -class LearnwareBenchmark: +@dataclass +class LLMBenchmark: + name: str + preprocess_function: Optional[Callable] = None + + def get_train_val_data(self) -> List[str]: + pass + + def get_test_data(self) -> List[str]: + pass + + +class LearnwareBenchmarkManager: def __init__(self): self.benchmark_configs = benchmark_configs @@ -148,53 +160,44 @@ def _load_cache_data(self, benchmark_config: BenchmarkConfig, data_type: str) -> return X_paths, y_paths - def get_benchmark(self, benchmark_config: Union[str, BenchmarkConfig]) -> Benchmark: + def get_benchmark(self, benchmark_config: Union[str, BenchmarkConfig, LLMBenchmarkConfig]) -> Benchmark: if isinstance(benchmark_config, str): benchmark_config = self.benchmark_configs[benchmark_config] - if not isinstance(benchmark_config, BenchmarkConfig): + if not isinstance(benchmark_config, (BenchmarkConfig, LLMBenchmarkConfig)): raise ValueError( "benchmark_config must be a BenchmarkConfig object or a string in benchmark_configs.keys()!" ) - # Load test data - test_X_paths, test_y_paths = self._load_cache_data(benchmark_config, "test") - - # Load train data - train_X_paths, train_y_paths = None, None - if benchmark_config.train_data_path is not None: - train_X_paths, train_y_paths = self._load_cache_data(benchmark_config, "train") - - # Load extra info - extra_info_path = None - if benchmark_config.extra_info_path is not None: - extra_info_path = os.path.join(C.cache_path, benchmark_config.name, "extra_info") - if not os.path.exists(extra_info_path): - self._download_data(benchmark_config.extra_info_path, extra_info_path) - - return Benchmark( - name=benchmark_config.name, - user_num=benchmark_config.user_num, - learnware_ids=benchmark_config.learnware_ids, - test_X_paths=test_X_paths, - test_y_paths=test_y_paths, - train_X_paths=train_X_paths, - train_y_paths=train_y_paths, - extra_info_path=extra_info_path, - ) - - -class LLMBenchmark: - def __init__(self): - self.benchmark_configs = benchmark_configs - - def list_general_capability_benchmarks(self) -> Dict: - pass - - def list_specific_capability_benchmarks(self) -> Dict: - pass + if isinstance(benchmark_config, LLMBenchmarkConfig): + return LLMBenchmark( + name=benchmark_config.name, + preprocess_function=benchmark_config.preprocess_function, + ) - def get_benchmark(self, benchmark_name: str) -> Dict[str, List[str]]: - # preprocess hugging face datasets into list of strings - # need to specify hugging face save path with root dir C.cache_dir - pass + elif isinstance(benchmark_config, BenchmarkConfig): + # Load test data + test_X_paths, test_y_paths = self._load_cache_data(benchmark_config, "test") + + # Load train data + train_X_paths, train_y_paths = None, None + if benchmark_config.train_data_path is not None: + train_X_paths, train_y_paths = self._load_cache_data(benchmark_config, "train") + + # Load extra info + extra_info_path = None + if benchmark_config.extra_info_path is not None: + extra_info_path = os.path.join(C.cache_path, benchmark_config.name, "extra_info") + if not os.path.exists(extra_info_path): + self._download_data(benchmark_config.extra_info_path, extra_info_path) + + return Benchmark( + name=benchmark_config.name, + user_num=benchmark_config.user_num, + learnware_ids=benchmark_config.learnware_ids, + test_X_paths=test_X_paths, + test_y_paths=test_y_paths, + train_X_paths=train_X_paths, + train_y_paths=train_y_paths, + extra_info_path=extra_info_path, + ) diff --git a/learnware/tests/benchmarks/config.py b/learnware/tests/benchmarks/config.py index 3921900f..c81575bd 100644 --- a/learnware/tests/benchmarks/config.py +++ b/learnware/tests/benchmarks/config.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Callable, Union @dataclass @@ -12,4 +12,10 @@ class BenchmarkConfig: extra_info_path: Optional[str] = None -benchmark_configs: Dict[str, BenchmarkConfig] = {} +@dataclass +class LLMBenchmarkConfig: + name: str + preprocess_function: Optional[Callable] = None + + +benchmark_configs: Dict[str, Union[BenchmarkConfig, LLMBenchmarkConfig]] = {} diff --git a/learnware/tests/benchmarks/llm_process_funcs.py b/learnware/tests/benchmarks/llm_process_funcs.py new file mode 100644 index 00000000..e69de29b From 5bfa023eec2e92222b6cb76c1e4c08b7059632d3 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 16 Dec 2024 20:17:33 +0800 Subject: [PATCH 041/108] [ENH] Add "Model Type" for sematic sepcification and modify checker --- learnware/config.py | 4 ++++ learnware/market/easy/checker.py | 37 +++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/learnware/config.py b/learnware/config.py index 24443146..ef87a028 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -97,6 +97,10 @@ def get_platform(): ], "Type": "Class", # Choose only one class }, + "Model Type" :{ + "Values": ["Base Model", "Fully Fine-tuned Model", "PEFT Model", "Others"], + "Type": "Class", + }, "Library": { "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"], "Type": "Class", diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index fea721f5..ae443cc3 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -44,6 +44,36 @@ def check_semantic_spec(semantic_spec): assert int(k) >= 0 and int(k) < dim, f"Dimension number in [0, {dim})" assert isinstance(v, str), "Description must be string" + assert semantic_spec["Task"]["Values"][0] in [ + "Classification", + "Regression", + "Feature Extraction", + "Others", + ] + + assert semantic_spec["Model Type"]["Values"][0] == "Others" + + if semantic_spec["Data"]["Values"][0] == "Image": + assert semantic_spec["Task"]["Values"][0] in [ + "Classification", + "Regression", + "Feature Extraction", + "Segmentation", + "Object Detection", + "Others", + ] + + assert semantic_spec["Model Type"]["Values"][0] == "Others" + + if semantic_spec["Data"]["Values"][0] == "Text": + assert semantic_spec["Task"]["Values"][0] in [ + "Classification", + "Regression", + "Feature Extraction", + "Text Generation", + "Others", + ] + if semantic_spec["Task"]["Values"][0] in ["Classification", "Regression"]: assert semantic_spec["Output"] is not None, "Lack of output semantics" dim = semantic_spec["Output"]["Dimension"] @@ -134,7 +164,12 @@ def __call__(self, learnware): inputs = np.random.randn(10, *input_shape) elif spec_type == "RKMETextSpecification" or spec_type == "TaskVectorSpecification": - inputs = EasyStatChecker._generate_random_text_list(10) + + if semantic_spec["Model Type"]["Values"][0] != "Others": + len = random.randint(10, 1000) + inputs = EasyStatChecker._generate_random_text_list(10, "en", len, len) + else: + inputs = EasyStatChecker._generate_random_text_list(10) elif spec_type == "RKMEImageSpecification": if not isinstance(input_shape, tuple) or not all(isinstance(item, int) for item in input_shape): From 55716330e8e2bf0022374312a539159d147cb028 Mon Sep 17 00:00:00 2001 From: liuht Date: Tue, 17 Dec 2024 22:34:47 +0800 Subject: [PATCH 042/108] [FIX] fix bugs --- learnware/market/easy/searcher.py | 56 ++++++++++++++++++---- learnware/market/heterogeneous/searcher.py | 44 +++-------------- learnware/market/heterogeneous/utils.py | 51 ++++++++++++++++++++ learnware/market/module.py | 17 ++++--- 4 files changed, 116 insertions(+), 52 deletions(-) create mode 100644 learnware/market/heterogeneous/utils.py diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 3c5a808b..77f30727 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -227,13 +227,13 @@ def __call__( class EasyStatSearcher(AtomicSearcher): - STAT_TYPES = ["RKMETableSpecification", "RKMEImageSpecification", "RKMETextSpecification"] + SPEC_TYPES = ["RKMETableSpecification", "RKMEImageSpecification", "RKMETextSpecification"] def is_applicable_learnware(self, learnware: Learnware) -> bool: - return any(spec_type in learnware.specification.stat_spec for spec_type in self.STAT_TYPES) + return any(spec_type in learnware.specification.stat_spec for spec_type in self.SPEC_TYPES) def is_applicable_user(self, user_info: BaseUserInfo) -> bool: - for spec_type in self.STAT_TYPES: + for spec_type in self.SPEC_TYPES: if spec_type in user_info.stat_info: user_rkme = user_info.stat_info[spec_type] @@ -468,6 +468,43 @@ def _filter_by_rkme_spec_single( idx = idx + 1 return sorted_score_list[:idx], learnware_list[:idx] + def _filter_by_rkme_spec_metadata( + self, + learnware_list: List[Learnware], + user_rkme: Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification], + ) -> List[Learnware]: + """Filter learnwares whose rkme metadata different from user_rkme + + Parameters + ---------- + learnware_list : List[Learnware] + The list of learnwares whose mixture approximates the user's rkme + user_rkme : Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification] + User RKME statistical specification + + Returns + ------- + List[Learnware] + Learnwares whose rkme dimensions equal user_rkme in user_info + """ + filtered_learnware_list = [] + user_rkme_dim = str(list(user_rkme.get_z().shape)[1:]) + + for learnware in learnware_list: + if self.stat_spec_type not in learnware.specification.stat_spec: + continue + rkme = learnware.specification.get_stat_spec_by_name(self.stat_spec_type) + if self.stat_spec_type == "RKMETextSpecification" and not set(user_rkme.language).issubset( + set(rkme.language) + ): + continue + + rkme_dim = str(list(rkme.get_z().shape)[1:]) + if rkme_dim == user_rkme_dim: + filtered_learnware_list.append(learnware) + + return filtered_learnware_list + def _search_by_rkme_spec_mixture_greedy( self, learnware_list: List[Learnware], @@ -587,9 +624,11 @@ def __call__( max_search_num: int = 5, search_method: str = "greedy", ) -> SearchResults: - self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info, spec_list=self.STAT_TYPES) + self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info, spec_list=self.SPEC_TYPES) + print(self.stat_spec_type, self.SPEC_TYPES) user_rkme = user_info.stat_info[self.stat_spec_type] + learnware_list = self._filter_by_rkme_spec_metadata(learnware_list, user_rkme) logger.info(f"After filter by rkme dimension, learnware_list length is {len(learnware_list)}") sorted_dist_list, single_learnware_list = self._search_by_rkme_spec_single(learnware_list, user_rkme) @@ -698,8 +737,8 @@ def __call__( """ learnware_list = self.learnware_organizer.get_learnwares(check_status=check_status) - for semantic_searcher in self.semantic_searchers: - if semantic_searcher.is_applicable(user_info): + for semantic_searcher in self.semantic_searcher_list: + if semantic_searcher.is_applicable_user(user_info): filtered_learnware_list = [ learnware for learnware in learnware_list if semantic_searcher.is_applicable_learnware(learnware) ] @@ -710,11 +749,12 @@ def __call__( if len(learnware_list) == 0: return SearchResults() - for stat_searcher in self.stat_searchers: - if stat_searcher.is_applicable(user_info): + for stat_searcher in self.stat_searcher_list: + if stat_searcher.is_applicable_user(user_info): filtered_learnware_list = [ learnware for learnware in learnware_list if stat_searcher.is_applicable_learnware(learnware) ] + # print(f"Using searcher: {stat_searcher.__class__}, filtered learnware_list: {len(filtered_learnware_list)}") return stat_searcher(filtered_learnware_list, user_info, max_search_num, search_method) return semantic_search_result diff --git a/learnware/market/heterogeneous/searcher.py b/learnware/market/heterogeneous/searcher.py index 0ff94ec0..c991113c 100644 --- a/learnware/market/heterogeneous/searcher.py +++ b/learnware/market/heterogeneous/searcher.py @@ -1,7 +1,9 @@ -from typing import Optional +from typing import List +from .utils import is_hetero from ..base import BaseUserInfo, SearchResults from ..easy import EasyStatSearcher +from ...learnware import Learnware from ...logger import get_module_logger logger = get_module_logger("hetero_searcher") @@ -13,46 +15,12 @@ class HeteroStatSearcher(EasyStatSearcher): def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: stat_specs = user_info.stat_info semantic_spec = user_info.semantic_spec - try: - table_stat_spec = stat_specs["RKMETableSpecification"] - table_input_shape = table_stat_spec.get_z().shape[1] - - semantic_data_type = semantic_spec["Data"]["Values"] - if len(semantic_data_type) > 0 and semantic_data_type != ["Table"]: - logger.warning("User doesn't provide correct data type, it must be Table.") - return False - - semantic_task_type = semantic_spec["Task"]["Values"] - if len(semantic_task_type) > 0 and semantic_task_type not in [["Classification"], ["Regression"]]: - logger.warning( - "User doesn't provide correct task type, it must be either Classification or Regression." - ) - return False - - semantic_input_description = semantic_spec["Input"] - semantic_description_dim = int(semantic_input_description["Dimension"]) - semantic_decription_feature_num = len(semantic_input_description["Description"]) - - if semantic_decription_feature_num <= 0: - if verbose: - logger.warning("At least one of Input.Description in semantic spec should be provides.") - return False - - if table_input_shape != semantic_description_dim: - if verbose: - logger.warning("User data feature dimensions mismatch with semantic specification.") - return False - - return True - except Exception as err: - if verbose: - logger.warning("Invalid heterogeneous search information provided.") - return False + return is_hetero(stat_specs=stat_specs, semantic_spec=semantic_spec, verbose=verbose) def __call__( self, + learnware_list: List[Learnware], user_info: BaseUserInfo, - check_status: Optional[int] = None, max_search_num: int = 5, search_method: str = "greedy", ) -> SearchResults: @@ -80,4 +48,4 @@ def __call__( user_hetero_spec = self.learnware_organizer.generate_hetero_map_spec(user_info) user_info.update_stat_info(user_hetero_spec.type, user_hetero_spec) - return super().__call__(user_info, check_status, max_search_num, search_method) + return super().__call__(learnware_list, user_info, max_search_num, search_method) diff --git a/learnware/market/heterogeneous/utils.py b/learnware/market/heterogeneous/utils.py new file mode 100644 index 00000000..860159e3 --- /dev/null +++ b/learnware/market/heterogeneous/utils.py @@ -0,0 +1,51 @@ +from ...logger import get_module_logger + +logger = get_module_logger("hetero_utils") + + +def is_hetero(stat_specs: dict, semantic_spec: dict, verbose=True) -> bool: + """Check if user_info satifies all the criteria required for enabling heterogeneous learnware search + + Parameters + ---------- + user_info : BaseUserInfo + user_info contains semantic_spec and stat_info + + Returns + ------- + bool + A flag indicating whether heterogeneous search is enabled for user_info + """ + try: + table_stat_spec = stat_specs["RKMETableSpecification"] + table_input_shape = table_stat_spec.get_z().shape[1] + + semantic_data_type = semantic_spec["Data"]["Values"] + if len(semantic_data_type) > 0 and semantic_data_type != ["Table"]: + logger.warning("User doesn't provide correct data type, it must be Table.") + return False + + semantic_task_type = semantic_spec["Task"]["Values"] + if len(semantic_task_type) > 0 and semantic_task_type not in [["Classification"], ["Regression"]]: + logger.warning("User doesn't provide correct task type, it must be either Classification or Regression.") + return False + + semantic_input_description = semantic_spec["Input"] + semantic_description_dim = int(semantic_input_description["Dimension"]) + semantic_decription_feature_num = len(semantic_input_description["Description"]) + + if semantic_decription_feature_num <= 0: + if verbose: + logger.warning("At least one of Input.Description in semantic spec should be provides.") + return False + + if table_input_shape != semantic_description_dim: + if verbose: + logger.warning("User data feature dimensions mismatch with semantic specification.") + return False + + return True + except Exception as err: + if verbose: + logger.warning("Invalid heterogeneous search information provided.") + return False diff --git a/learnware/market/module.py b/learnware/market/module.py index 8c87e5cd..2555b83a 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -22,8 +22,8 @@ def get_market_component( if name == "easy": easy_organizer = EasyOrganizer(market_id=market_id, rebuild=rebuild) - semantic_searcher_list = [EasyFuzzSemanticSearcher()] - stat_searcher_list = [EasyStatSearcher()] + semantic_searcher_list = [EasyFuzzSemanticSearcher(easy_organizer)] + stat_searcher_list = [EasyStatSearcher(easy_organizer)] easy_searcher = SeqCombinedSearcher( organizer=easy_organizer, semantic_searcher_list=semantic_searcher_list, @@ -44,8 +44,8 @@ def get_market_component( elif name == "hetero": hetero_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) - semantic_searcher_list = [EasyFuzzSemanticSearcher(organizer=hetero_organizer)] - stat_searcher_list = [HeteroStatSearcher(), EasyStatSearcher()] + semantic_searcher_list = [EasyFuzzSemanticSearcher(hetero_organizer)] + stat_searcher_list = [HeteroStatSearcher(hetero_organizer), EasyStatSearcher(hetero_organizer)] hetero_searcher = SeqCombinedSearcher( organizer=hetero_organizer, semantic_searcher_list=semantic_searcher_list, @@ -66,8 +66,13 @@ def get_market_component( elif name == "llm": llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) - semantic_searcher_list = [EasyFuzzSemanticSearcher()] - stat_searcher_list = [LLMStatSearcher(), HeteroStatSearcher(), EasyStatSearcher()] + semantic_searcher_list = [EasyFuzzSemanticSearcher(llm_organizer)] + stat_searcher_list = [ + LLMStatSearcher(llm_organizer), + HeteroStatSearcher(llm_organizer), + EasyStatSearcher(llm_organizer), + ] + llm_searcher = SeqCombinedSearcher( organizer=llm_organizer, semantic_searcher_list=semantic_searcher_list, From 1497553a151fe13be7270ef9e769827d907ce62f Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Wed, 18 Dec 2024 18:15:41 +0800 Subject: [PATCH 043/108] [MNT] Add MODEL_TYPE in SemanticSpecificationKey --- learnware/client/learnware_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index 2be5e550..0afc4a77 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -52,6 +52,7 @@ class SemanticSpecificationKey(Enum): DATA_TYPE = "Data" TASK_TYPE = "Task" LIBRARY_TYPE = "Library" + MODEL_TYPE = "Model Type" SENARIOES = "Scenario" LICENSE = "License" From 2b1572a2f53929ad8bb8f51209b3924b3a94f2b0 Mon Sep 17 00:00:00 2001 From: liuht Date: Thu, 19 Dec 2024 15:57:09 +0800 Subject: [PATCH 044/108] [MNT] add LLMEasyOrganizer, modify LLMGeneralCapabilitySpecification --- docs/components/market.rst | 4 +- learnware/market/__init__.py | 3 +- learnware/market/easy/checker.py | 33 ++++- .../heterogeneous/organizer/__init__.py | 6 +- learnware/market/heterogeneous/searcher.py | 14 +- learnware/market/heterogeneous/utils.py | 4 +- learnware/market/llm/__init__.py | 6 +- learnware/market/llm/organizer.py | 121 ++++++++++++++++++ learnware/market/module.py | 4 +- learnware/specification/system/llm_vector.py | 35 ++++- learnware/tests/__init__.py | 3 +- learnware/tests/benchmarks/config.py | 2 + 12 files changed, 207 insertions(+), 28 deletions(-) create mode 100644 learnware/market/llm/organizer.py diff --git a/docs/components/market.rst b/docs/components/market.rst index 896b5ec0..3c349b98 100644 --- a/docs/components/market.rst +++ b/docs/components/market.rst @@ -65,7 +65,7 @@ Easy market is a basic realization of the learnware market. It consists of ``Eas ``EasyOrganizer`` mainly has the following methods to store learnwares, which is an easy way to organize learnwares. - **reload_market**: Reload the learnware market when the server restarts and return a flag indicating whether the market is reloaded successfully. -- **add_learnware**: Add a learnware with ``learnware_id``, ``semantic_spec`` and model files in ``zip_path`` into the market. Return the ``learnware_id`` and ``learnwere_status``. The ``learnwere_status`` is set to ``check_status`` if it is provided. Otherwise, the ``checker`` will be called to generate the ``learnwere_status``. +- **add_learnware**: Add a learnware with ``learnware_id``, ``semantic_spec`` and model files in ``zip_path`` into the market. Return the ``learnware_id`` and ``learnware_status``. The ``learnware_status`` is set to ``check_status`` if it is provided. Otherwise, the ``checker`` will be called to generate the ``learnware_status``. - **delete_learnware**: Delete the learnware with ``id`` from the market and return a flag indicating whether the deletion is successful. - **update_learnware**: Update the learnware's ``zip_path``, ``semantic_spec``, ``check_status``. If None, the corresponding item is not updated. Return a flag indicating whether it passed the ``checker``. - **get_learnwares**: Similar to **get_learnware_ids**, but return list of learnwares instead of ids. @@ -148,7 +148,7 @@ As more learnwares are submitted, this heterogeneous engine will continuously up - **reload_market**: Reloads the heterogeneous engine if there is one. Otherwise, initialize an engine with default configurations. Returns a flag indicating whether the market is reloaded successfully. - **reset**: Resets the heterogeneous market with specific settings regarding the heterogeneous engine such as ``auto_update``, ``auto_update_limit`` and ``training_args`` configurations. -- **add_learnware**: Add a learnware into the market, meanwhile generating ``HeteroMapTableSpecification`` for the learnware using the heterogeneous engine. The engine's update process will be triggered if ``auto_update`` is set to True and the number of learnwares in the market with ``USABLE_LEARNWARE`` status exceeds ``auto_update_limit``. Return the ``learnware_id`` and ``learnwere_status``. +- **add_learnware**: Add a learnware into the market, meanwhile generating ``HeteroMapTableSpecification`` for the learnware using the heterogeneous engine. The engine's update process will be triggered if ``auto_update`` is set to True and the number of learnwares in the market with ``USABLE_LEARNWARE`` status exceeds ``auto_update_limit``. Return the ``learnware_id`` and ``learnware_status``. - **delete_learnware**: Removes the learnware with ``id`` from the market and also removes its new specification if there is one. Return a flag of whether the deletion is successful. - **update_learnware**: Update the learnware's ``zip_path``, ``semantic_spec``, ``check_status`` and its new specification if there is one. Return a flag indicating whether it passed the ``checker``. - **generate_hetero_map_spec**: Generate ``HeteroMapTableSpecification`` for users based on the user's statistical specification provided in ``user_info``. diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 6cb5ee58..852151bb 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -13,7 +13,7 @@ from .evolve import EvolvedOrganizer from .evolve_anchor import EvolvedAnchoredOrganizer from .heterogeneous import HeteroMapTableOrganizer, HeteroStatSearcher -from .llm import LLMStatSearcher +from .llm import LLMEasyOrganizer, LLMStatSearcher from .module import instantiate_learnware_market __all__ = [ @@ -37,6 +37,7 @@ "EvolvedAnchoredOrganizer", "HeteroMapTableOrganizer", "HeteroStatSearcher", + "LLMEasyOrganizer", "LLMStatSearcher", "instantiate_learnware_market", ] diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index ae443cc3..f43bab16 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -9,6 +9,8 @@ from ..utils import parse_specification_type from ...config import C from ...logger import get_module_logger +from ...specification import LLMGeneralCapabilitySpecification +from ...tests.benchmarks import llm_general_capability_benchmark_configs logger = get_module_logger("easy_checker", "INFO") @@ -50,7 +52,7 @@ def check_semantic_spec(semantic_spec): "Feature Extraction", "Others", ] - + assert semantic_spec["Model Type"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Image": @@ -62,7 +64,7 @@ def check_semantic_spec(semantic_spec): "Object Detection", "Others", ] - + assert semantic_spec["Model Type"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Text": @@ -164,7 +166,6 @@ def __call__(self, learnware): inputs = np.random.randn(10, *input_shape) elif spec_type == "RKMETextSpecification" or spec_type == "TaskVectorSpecification": - if semantic_spec["Model Type"]["Values"][0] != "Others": len = random.randint(10, 1000) inputs = EasyStatChecker._generate_random_text_list(10, "en", len, len) @@ -185,14 +186,14 @@ def __call__(self, learnware): try: outputs = learnware.predict(inputs) except Exception: - message = f"The learnware {learnware.id} prediction is not avaliable!" + message = f"The learnware [{learnware.id}] prediction is not available!" logger.warning(message) message += "\r\n" + traceback.format_exc() return self.INVALID_LEARNWARE, message # Check length of input and output if len(inputs) != len(outputs): - message = f"The learnware {learnware.id} output length must be equal to input length!" + message = f"The learnware [{learnware.id}] output length must be equal to input length!" logger.warning(message) return self.INVALID_LEARNWARE, message @@ -205,7 +206,7 @@ def __call__(self, learnware): if isinstance(outputs, torch.Tensor): outputs = outputs.detach().cpu().numpy() if not isinstance(outputs, np.ndarray): - message = f"The learnware {learnware.id} output must be np.ndarray or torch.Tensor!" + message = f"The learnware [{learnware.id}] output must be np.ndarray or torch.Tensor!" logger.warning(message) return self.INVALID_LEARNWARE, message @@ -247,6 +248,26 @@ def __call__(self, learnware): logger.warning(message) return self.INVALID_LEARNWARE, message + # check llm base model learnware general capability + if ( + semantic_spec["Data"]["Values"] == ["Text"] + and semantic_spec["Task"]["Values"] == ["Text Generation"] + and semantic_spec["Model Type"]["Values"] == ["Base Model"] + ): + try: + general_capability_spec = LLMGeneralCapabilitySpecification() + general_capability_spec.generate_stat_spec_from_system( + learnware=learnware, benchmark_configs=llm_general_capability_benchmark_configs + ) + learnware.update_stat_spec(general_capability_spec.type, general_capability_spec) + except Exception: + message = ( + f"The learnware [{learnware.id}] llm base model general capability evaluation is not available!" + ) + logger.warning(message) + message += "\r\n" + traceback.format_exc() + return self.INVALID_LEARNWARE, message + except Exception as e: message = f"The learnware [{learnware.id}] is not valid! Due to {repr(e)}." logger.warning(message) diff --git a/learnware/market/heterogeneous/organizer/__init__.py b/learnware/market/heterogeneous/organizer/__init__.py index 3dcbc6d5..e04719a8 100644 --- a/learnware/market/heterogeneous/organizer/__init__.py +++ b/learnware/market/heterogeneous/organizer/__init__.py @@ -89,11 +89,11 @@ def add_learnware( - str indicating model_id - int indicating the final learnware check_status """ - learnware_id, learnwere_status = super(HeteroMapTableOrganizer, self).add_learnware( + learnware_id, learnware_status = super(HeteroMapTableOrganizer, self).add_learnware( zip_path, semantic_spec, check_status, learnware_id ) - if learnwere_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(learnware_id)): + if learnware_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(learnware_id)): self._update_learnware_hetero_spec(learnware_id) if self.auto_update: @@ -115,7 +115,7 @@ def add_learnware( self.count_down = self.auto_update_limit - return learnware_id, learnwere_status + return learnware_id, learnware_status def delete_learnware(self, id: str) -> bool: """Delete learnware from heterogeneous learnware market. diff --git a/learnware/market/heterogeneous/searcher.py b/learnware/market/heterogeneous/searcher.py index c991113c..40710b6e 100644 --- a/learnware/market/heterogeneous/searcher.py +++ b/learnware/market/heterogeneous/searcher.py @@ -12,10 +12,20 @@ class HeteroStatSearcher(EasyStatSearcher): SPEC_TYPES = ["HeteroMapTableSpecification"] - def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: + def is_applicable_learnware(self, learnware: Learnware) -> bool: + if not super(HeteroStatSearcher, self).is_applicable_learnware(learnware): + return False + + spec = learnware.get_specification() + return is_hetero(stat_specs=spec.get_stat_spec(), semantic_spec=spec.get_semantic_spec(), verbose=False) + + def is_applicable_user(self, user_info: BaseUserInfo) -> bool: + if not super(HeteroStatSearcher, self).is_applicable_user(user_info): + return False + stat_specs = user_info.stat_info semantic_spec = user_info.semantic_spec - return is_hetero(stat_specs=stat_specs, semantic_spec=semantic_spec, verbose=verbose) + return is_hetero(stat_specs=stat_specs, semantic_spec=semantic_spec, verbose=False) def __call__( self, diff --git a/learnware/market/heterogeneous/utils.py b/learnware/market/heterogeneous/utils.py index 860159e3..67b7fcbe 100644 --- a/learnware/market/heterogeneous/utils.py +++ b/learnware/market/heterogeneous/utils.py @@ -32,9 +32,9 @@ def is_hetero(stat_specs: dict, semantic_spec: dict, verbose=True) -> bool: semantic_input_description = semantic_spec["Input"] semantic_description_dim = int(semantic_input_description["Dimension"]) - semantic_decription_feature_num = len(semantic_input_description["Description"]) + semantic_description_feature_num = len(semantic_input_description["Description"]) - if semantic_decription_feature_num <= 0: + if semantic_description_feature_num <= 0: if verbose: logger.warning("At least one of Input.Description in semantic spec should be provides.") return False diff --git a/learnware/market/llm/__init__.py b/learnware/market/llm/__init__.py index 7b1624a0..ecec1109 100644 --- a/learnware/market/llm/__init__.py +++ b/learnware/market/llm/__init__.py @@ -4,9 +4,11 @@ logger = get_module_logger("market_llm") if not is_torch_available(verbose=False): + LLMEasyOrganizer = None LLMStatSearcher = None - logger.error("LLMStatSearcher is not available because 'torch' is not installed!") + logger.error("LLMStatSearcher and LLMEasyOrganizer are not available because 'torch' is not installed!") else: + from .organizer import LLMEasyOrganizer from .searcher import LLMStatSearcher -__all__ = ["LLMStatSearcher"] +__all__ = ["LLMEasyOrganizer", "LLMStatSearcher"] diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py new file mode 100644 index 00000000..e70315e9 --- /dev/null +++ b/learnware/market/llm/organizer.py @@ -0,0 +1,121 @@ +import os +import tempfile +import zipfile +import traceback +from shutil import copyfile +from typing import List, Union + +from ..heterogeneous import HeteroMapTableOrganizer +from ..base import BaseChecker +from ...config import C +from ...utils import read_yaml_to_dict, save_dict_to_yaml +from ...logger import get_module_logger +from ...specification import LLMGeneralCapabilitySpecification +from ...tests.benchmarks import BenchmarkConfig + +logger = get_module_logger("llm_easy_organizer") + + +class LLMEasyOrganizer(HeteroMapTableOrganizer): + # update base model learnware when llm benchmark list updates + def update_learnware( + self, + id: str, + zip_path: str = None, + semantic_spec: dict = None, + check_status: int = None, + benchmark_configs: List[BenchmarkConfig] = None, + ): + final_status = super(LLMEasyOrganizer, self).update_learnware(id, zip_path, semantic_spec, check_status) + if final_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(id)): + self._update_learnware_general_capability_spec(ids=id, benchmark_configs=benchmark_configs) + return final_status + + def _update_learnware_general_capability_spec( + self, ids: Union[str, List[str]], benchmark_configs: List[BenchmarkConfig] + ): + """Update learnware by ids, attempting to generate LLMGeneralCapabilitySpecification for them. + + Parameters + ---------- + ids : Union[str, List[str]] + Give a id or a list of ids + str: id of target learnware + List[str]: A list of ids of target learnwares + """ + if isinstance(ids, str): + ids = [ids] + + for idx in ids: + try: + general_capability_spec = LLMGeneralCapabilitySpecification() + general_capability_spec.generate_stat_spec_from_system( + learnware=self.learnware_list[idx], benchmark_configs=benchmark_configs + ) + general_capability_spec_config = { + "module_path": "learnware.specification", + "class_name": general_capability_spec.type, + "file_name": "general_capability_spec.json", + "kwargs": {}, + } + + zip_path = self.learnware_zip_list[id] + folder_dir = self.learnware_folder_list[id] + self.learnware_list[idx].update_stat_spec(general_capability_spec.type, general_capability_spec) + + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + # update yaml file + with zipfile.ZipFile(zip_path, "r") as z_file: + z_file.extract(C.learnware_folder_config["yaml_file"], tempdir) + + learnware_yaml_path = os.path.join(tempdir, C.learnware_folder_config["yaml_file"]) + yaml_config = read_yaml_to_dict(learnware_yaml_path) + if "stat_specifications" in yaml_config: + yaml_config["stat_specifications"].append(general_capability_spec_config) + save_dict_to_yaml(yaml_config, learnware_yaml_path) + + with zipfile.ZipFile(zip_path, "a") as z_file: + z_file.write(learnware_yaml_path, C.learnware_folder_config["yaml_file"]) + + # save general capability specification + stat_spec_path = os.path.join(tempdir, general_capability_spec_config["file_name"]) + general_capability_spec.save(stat_spec_path) + with zipfile.ZipFile(zip_path, "a") as z_file: + z_file.write(stat_spec_path, general_capability_spec_config["file_name"]) + + # update learnware folder + copyfile(learnware_yaml_path, os.path.join(folder_dir, C.learnware_folder_config["yaml_file"])) + copyfile(stat_spec_path, os.path.join(folder_dir, general_capability_spec_config["file_name"])) + + except Exception as err: + traceback.print_exc() + logger.warning(f"Learnware {idx} generate LLMGeneralCapabilitySpecification failed!") + + def _get_llm_base_model_learnware_ids(self, ids: Union[str, List[str]]) -> List[str]: + """Get learnware ids that corresponding learnware contains a llm base model. + + Parameters + ---------- + ids : Union[str, List[str]] + Give a id or a list of ids + str: id of target learnware + List[str]: A list of ids of target learnwares + + Returns + ------- + List[str] + Learnware ids + """ + if isinstance(ids, str): + ids = [ids] + + ret = [] + for idx in ids: + semantic_spec = self.learnware_list[idx].get_specification().get_semantic_spec() + if ( + semantic_spec["Data"]["Values"] == ["Text"] + and semantic_spec["Task"]["Values"] == ["Text Generation"] + and semantic_spec["Model Type"]["Values"] == ["Base Model"] + ): + ret.append(idx) + return ret diff --git a/learnware/market/module.py b/learnware/market/module.py index 2555b83a..2dbb7a6e 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -9,7 +9,7 @@ EasyStatChecker, ) from .heterogeneous import HeteroMapTableOrganizer, HeteroStatSearcher -from .llm import LLMStatSearcher +from .llm import LLMEasyOrganizer, LLMStatSearcher def get_market_component( @@ -64,7 +64,7 @@ def get_market_component( } elif name == "llm": - llm_organizer = HeteroMapTableOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) + llm_organizer = LLMEasyOrganizer(market_id=market_id, rebuild=rebuild, **organizer_kwargs) semantic_searcher_list = [EasyFuzzSemanticSearcher(llm_organizer)] stat_searcher_list = [ diff --git a/learnware/specification/system/llm_vector.py b/learnware/specification/system/llm_vector.py index 749a6653..efc7e7dd 100644 --- a/learnware/specification/system/llm_vector.py +++ b/learnware/specification/system/llm_vector.py @@ -1,10 +1,10 @@ from __future__ import annotations -import numpy as np +from typing import List from .base import SystemStatSpecification -from ...model import TorchModel -from ...tests.benchmarks import LearnwareBenchmarkManager +from ...tests.benchmarks import LearnwareBenchmarkManager, BenchmarkConfig from ...logger import get_module_logger +from ...learnware import Learnware logger = get_module_logger("llm_general_capability_spec") @@ -13,14 +13,35 @@ class LLMGeneralCapabilitySpecification(SystemStatSpecification): """Large Language Model General Capability Specification""" def __init__(self): - self.score_vector = None super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) - def generate_stat_spec_from_system(self, model: TorchModel) -> np.ndarray: + def generate_stat_spec_from_system(self, learnware: Learnware, benchmark_configs: List[BenchmarkConfig]) -> dict: # model: foundation model dataset_names = LearnwareBenchmarkManager().list_benchmarks() pass - def get_spec(self) -> np.ndarray: - return self.score_vector + def save(self, filepath: str): + """Save the computed specification to a specified path in JSON format. + + Parameters + ---------- + filepath : str + The specified saving path + """ + raise NotImplementedError("save is not implemented") + + def load(self, filepath: str) -> bool: + """Load a specification file in JSON format from the specified path. + + Parameters + ---------- + filepath : str + The specified loading path. + + Returns + ------- + bool + True if the specification is loaded successfully. + """ + raise NotImplementedError("load is not implemented") diff --git a/learnware/tests/__init__.py b/learnware/tests/__init__.py index 898b2fec..e9d27022 100644 --- a/learnware/tests/__init__.py +++ b/learnware/tests/__init__.py @@ -1,3 +1,4 @@ from .utils import parametrize +from .benchmarks.config import llm_general_capability_benchmark_configs -__all__ = ["parametrize"] +__all__ = ["parametrize", "llm_general_capability_benchmark_configs"] diff --git a/learnware/tests/benchmarks/config.py b/learnware/tests/benchmarks/config.py index c81575bd..90bf31aa 100644 --- a/learnware/tests/benchmarks/config.py +++ b/learnware/tests/benchmarks/config.py @@ -19,3 +19,5 @@ class LLMBenchmarkConfig: benchmark_configs: Dict[str, Union[BenchmarkConfig, LLMBenchmarkConfig]] = {} + +llm_general_capability_benchmark_configs: Dict[str, LLMBenchmarkConfig] = {} From 6a6ee9acec78e5d0e7f6cd359f98cd639756b30a Mon Sep 17 00:00:00 2001 From: liuht Date: Thu, 19 Dec 2024 17:15:53 +0800 Subject: [PATCH 045/108] [MNT] modify general capability specification details --- learnware/market/easy/checker.py | 4 ++-- learnware/market/llm/organizer.py | 7 ++---- learnware/specification/system/__init__.py | 6 +++-- .../llm_general_capability_spec/config.py | 5 ++++ .../spec.py} | 23 +++++++++++-------- 5 files changed, 27 insertions(+), 18 deletions(-) create mode 100644 learnware/specification/system/llm_general_capability_spec/config.py rename learnware/specification/system/{llm_vector.py => llm_general_capability_spec/spec.py} (65%) diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index f43bab16..965b72ea 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -10,7 +10,7 @@ from ...config import C from ...logger import get_module_logger from ...specification import LLMGeneralCapabilitySpecification -from ...tests.benchmarks import llm_general_capability_benchmark_configs +from ...specification.system.llm_general_capability_spec.config import general_capability_benchmark_configs logger = get_module_logger("easy_checker", "INFO") @@ -257,7 +257,7 @@ def __call__(self, learnware): try: general_capability_spec = LLMGeneralCapabilitySpecification() general_capability_spec.generate_stat_spec_from_system( - learnware=learnware, benchmark_configs=llm_general_capability_benchmark_configs + learnware=learnware, benchmark_configs=general_capability_benchmark_configs[:2] ) learnware.update_stat_spec(general_capability_spec.type, general_capability_spec) except Exception: diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index e70315e9..79e63929 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -24,11 +24,10 @@ def update_learnware( zip_path: str = None, semantic_spec: dict = None, check_status: int = None, - benchmark_configs: List[BenchmarkConfig] = None, ): final_status = super(LLMEasyOrganizer, self).update_learnware(id, zip_path, semantic_spec, check_status) if final_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(id)): - self._update_learnware_general_capability_spec(ids=id, benchmark_configs=benchmark_configs) + self._update_learnware_general_capability_spec(id) return final_status def _update_learnware_general_capability_spec( @@ -49,9 +48,7 @@ def _update_learnware_general_capability_spec( for idx in ids: try: general_capability_spec = LLMGeneralCapabilitySpecification() - general_capability_spec.generate_stat_spec_from_system( - learnware=self.learnware_list[idx], benchmark_configs=benchmark_configs - ) + general_capability_spec.generate_stat_spec_from_system(learnware=self.learnware_list[idx]) general_capability_spec_config = { "module_path": "learnware.specification", "class_name": general_capability_spec.type, diff --git a/learnware/specification/system/__init__.py b/learnware/specification/system/__init__.py index 5677074e..6472fa3c 100644 --- a/learnware/specification/system/__init__.py +++ b/learnware/specification/system/__init__.py @@ -7,9 +7,11 @@ if not is_torch_available(verbose=False): HeteroMapTableSpecification = None LLMGeneralCapabilitySpecification = None - logger.error("HeteroMapTableSpecification and LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!") + logger.error( + "HeteroMapTableSpecification and LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!" + ) else: from .hetero_table import HeteroMapTableSpecification - from .llm_vector import LLMGeneralCapabilitySpecification + from .llm_general_capability_spec.spec import LLMGeneralCapabilitySpecification __all__ = ["SystemStatSpecification", "HeteroMapTableSpecification", "LLMGeneralCapabilitySpecification"] diff --git a/learnware/specification/system/llm_general_capability_spec/config.py b/learnware/specification/system/llm_general_capability_spec/config.py new file mode 100644 index 00000000..e7a04e20 --- /dev/null +++ b/learnware/specification/system/llm_general_capability_spec/config.py @@ -0,0 +1,5 @@ +from typing import List + +from ....tests.benchmarks import BenchmarkConfig + +general_capability_benchmark_configs: List[BenchmarkConfig] = [] diff --git a/learnware/specification/system/llm_vector.py b/learnware/specification/system/llm_general_capability_spec/spec.py similarity index 65% rename from learnware/specification/system/llm_vector.py rename to learnware/specification/system/llm_general_capability_spec/spec.py index efc7e7dd..50931cf2 100644 --- a/learnware/specification/system/llm_vector.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import List +from typing import List, Optional -from .base import SystemStatSpecification -from ...tests.benchmarks import LearnwareBenchmarkManager, BenchmarkConfig -from ...logger import get_module_logger -from ...learnware import Learnware +from .config import general_capability_benchmark_configs +from ..base import SystemStatSpecification +from ....tests.benchmarks import BenchmarkConfig +from ....logger import get_module_logger +from ....learnware import Learnware logger = get_module_logger("llm_general_capability_spec") @@ -12,13 +13,17 @@ class LLMGeneralCapabilitySpecification(SystemStatSpecification): """Large Language Model General Capability Specification""" + benchmark_configs: List[BenchmarkConfig] = general_capability_benchmark_configs + def __init__(self): super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) - def generate_stat_spec_from_system(self, learnware: Learnware, benchmark_configs: List[BenchmarkConfig]) -> dict: - # model: foundation model - dataset_names = LearnwareBenchmarkManager().list_benchmarks() - + def generate_stat_spec_from_system( + self, + learnware: Learnware, + benchmark_configs: Optional[List[BenchmarkConfig]] = None, + update_existing: bool = False, + ) -> dict: pass def save(self, filepath: str): From ed9c94c3362663e6d5a242056d18f561f9d6f4a1 Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Fri, 20 Dec 2024 11:26:35 +0800 Subject: [PATCH 046/108] [ENH] automatically download required learnware --- learnware/client/learnware_client.py | 68 ++++++++++++++++++++++++++-- learnware/client/utils.py | 6 ++- setup.py | 2 +- 3 files changed, 68 insertions(+), 8 deletions(-) diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index 2be5e550..eaeac526 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -57,7 +57,7 @@ class SemanticSpecificationKey(Enum): class LearnwareClient: - def __init__(self, host=None): + def __init__(self, host=None, timeout=None): self.headers = None if host is None: @@ -68,8 +68,25 @@ def __init__(self, host=None): self.chunk_size = 1024 * 1024 self.tempdir_list = [] self.login_status = False + if timeout is None: + self.timeout = 60 + else: + self.timeout = timeout atexit.register(self.cleanup) + self.storage_path = os.environ.get("LEARNWARE_STORAGE_PATH") + if self.storage_path is None: + self.storage_path = os.path.join(os.path.expanduser("~"), ".learnware", "default", "learnware_pool") + pass + self.default_zip_path = os.path.join(self.storage_path, "zips") + self.default_unzip_path = os.path.join(self.storage_path, "unzipped_learnwares") + if not os.path.exists(self.default_zip_path): + os.makedirs(self.default_zip_path, exist_ok=True) + pass + if not os.path.exists(self.default_unzip_path): + os.makedirs(self.default_unzip_path, exist_ok=True) + pass + def is_connected(self): url = f"{self.host}/auth/login_by_token" response = requests.post(url) @@ -80,8 +97,7 @@ def is_connected(self): def login(self, email, token): url = f"{self.host}/auth/login_by_token" - response = requests.post(url, json={"email": email, "token": token}) - + response = requests.post(url, json={"email": email, "token": token}, timeout=self.timeout) result = response.json() if result["code"] != 0: raise Exception("login failed: " + json.dumps(result)) @@ -189,7 +205,11 @@ def get_semantic_specification(self, learnware_id: str): return result["data"]["learnware_info"]["semantic_specification"] - def download_learnware(self, learnware_id: str, save_path: str): + def download_learnware(self, learnware_id: str, save_path: str = None): + if save_path is None: + save_path = os.path.join(self.default_zip_path, learnware_id + ".zip") + pass + url = f"{self.host}/engine/download_learnware" response = requests.get( @@ -202,7 +222,7 @@ def download_learnware(self, learnware_id: str, save_path: str): ) if response.status_code != 200: - raise Exception("download failed: " + json.dumps(response.json())) + raise Exception("download failed: " + response.text) num_chunks = int(response.headers["Content-Length"]) // CHUNK_SIZE + 1 bar = tqdm(total=num_chunks, desc="Downloading", unit="MB") @@ -272,6 +292,7 @@ def search_learnware(self, user_info: BaseUserInfo, page_size=10, page_index=0): "page": page_index, }, headers=self.headers, + timeout=self.timeout ) result = response.json() if result["code"] != 0: @@ -309,6 +330,43 @@ def list_semantic_specification_values(self, key: SemanticSpecificationKey): semantic_conf = result["data"]["semantic_specification"] return semantic_conf[key.value]["Values"] + def get_pretrained_path(self, learnware_id: str): + # get pretrained path from learnware id + + # check learnware exists + if os.path.exists(os.path.join(self.default_unzip_path, learnware_id)): + pass + else: + # learnware not exist + if not os.path.exists(os.path.join(self.default_zip_path, learnware_id + ".zip")): + self.download_learnware(learnware_id) + pass + else: + # learnware exists + pass + self.unzip_learnware(learnware_id) + pass + + yaml_file = os.path.join(self.default_unzip_path, learnware_id, C.learnware_folder_config["yaml_file"]) + with open(yaml_file, "r") as fin: + learnware_info = yaml.safe_load(fin) + pass + pretrained_path = learnware_info['model'].get("weights_file_path") + if pretrained_path is None: + raise FileNotFoundError(f"Pretrained path not found in learnware {learnware_id}") + + return os.path.join(self.default_unzip_path, learnware_id, pretrained_path) + pass + + def unzip_learnware(self, learnware_id: str): + if not os.path.exists(os.path.join(self.default_zip_path, learnware_id + ".zip")): + raise FileNotFoundError(f"Learnware {learnware_id} not found") + else: + with zipfile.ZipFile(os.path.join(self.default_zip_path, learnware_id + ".zip"), "r") as z_file: + z_file.extractall(os.path.join(self.default_unzip_path, learnware_id)) + pass + pass + def load_learnware( self, learnware_path: Optional[Union[str, List[str]]] = None, diff --git a/learnware/client/utils.py b/learnware/client/utils.py index fc96c01d..5e74cbaa 100644 --- a/learnware/client/utils.py +++ b/learnware/client/utils.py @@ -8,7 +8,7 @@ logger = get_module_logger(module_name="client_utils") -def system_execute(args, timeout=None, env=None, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE): +def system_execute(args, timeout=None, env=None, stdout=None, stderr=subprocess.PIPE): env = os.environ.copy() if env is None else env args = args if isinstance(args, str) else " ".join(args) @@ -92,6 +92,8 @@ def install_environment(learnware_dirpath, conda_env, conda_prefix=None): raise Exception("Environment.yaml or requirements.txt not found in the learnware folder.") logger.info(f"install learnware package for conda env [{conda_env}]") + learnware_package = os.environ.get("LEARNWARE_PACKAGE_LOCATION", "learnware") + system_execute( args=[ "conda", @@ -104,6 +106,6 @@ def install_environment(learnware_dirpath, conda_env, conda_prefix=None): "-m", "pip", "install", - "learnware", + learnware_package, ] ) diff --git a/setup.py b/setup.py index 7a485838..f2da971c 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ def get_version(rel_path: str) -> str: "docker>=6.1.3", "rapidfuzz>=3.4.0", "langdetect>=1.0.9", - "huggingface-hub<0.18", + "huggingface-hub", "transformers>=4.34.1", "portalocker>=2.0.0", "qpsolvers[clarabel]>=4.0.1", From 44d701fecdf3122c9f70efc8428c6c8d5979e7e7 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Fri, 20 Dec 2024 15:10:29 +0800 Subject: [PATCH 047/108] [ENH] Add new Specification for Large Language Model (LLM). --- learnware/market/easy/checker.py | 2 +- learnware/market/llm/searcher.py | 6 +- learnware/market/utils.py | 2 +- learnware/specification/__init__.py | 4 +- learnware/specification/module.py | 6 +- learnware/specification/regular/__init__.py | 4 +- learnware/specification/regular/base.py | 21 ++ .../specification/regular/text/__init__.py | 8 +- .../specification/regular/text/generative.py | 263 ++++++++++++++++++ .../specification/regular/text/task_vector.py | 49 ---- 10 files changed, 300 insertions(+), 65 deletions(-) create mode 100644 learnware/specification/regular/text/generative.py delete mode 100644 learnware/specification/regular/text/task_vector.py diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index ae443cc3..33b71194 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -163,7 +163,7 @@ def __call__(self, learnware): return self.INVALID_LEARNWARE, message inputs = np.random.randn(10, *input_shape) - elif spec_type == "RKMETextSpecification" or spec_type == "TaskVectorSpecification": + elif spec_type == "RKMETextSpecification" or spec_type == "GenerativeModelSpecification": if semantic_spec["Model Type"]["Values"][0] != "Others": len = random.randint(10, 1000) diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index ca4c7769..c89a0bd7 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -7,15 +7,15 @@ class LLMStatSearcher(AtomicSearcher): - SPEC_TYPES = ["TaskVectorSpecification"] + SPEC_TYPES = ["GenerativeModelSpecification"] def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: stat_specs = user_info.stat_info semantic_spec = user_info.semantic_spec try: - if "TaskVectorSpecification" not in stat_specs: + if "GenerativeModelSpecification" not in stat_specs: if verbose: - logger.warning("TaskVectorSpecification is not provided in stat_info.") + logger.warning("GenerativeModelSpecification is not provided in stat_info.") return False semantic_data_type = semantic_spec["Data"]["Values"] diff --git a/learnware/market/utils.py b/learnware/market/utils.py index f50e6a7b..b62ca3da 100644 --- a/learnware/market/utils.py +++ b/learnware/market/utils.py @@ -3,7 +3,7 @@ def parse_specification_type( spec_list=[ "HeteroMapTableSpecification", "RKMETableSpecification", - "TaskVectorSpecification" + "GenerativeModelSpecification" "RKMETextSpecification", "RKMEImageSpecification", "LLMGeneralCapabilitySpecification", diff --git a/learnware/specification/__init__.py b/learnware/specification/__init__.py index f8a95b3d..8a94f2fe 100644 --- a/learnware/specification/__init__.py +++ b/learnware/specification/__init__.py @@ -5,7 +5,7 @@ RKMEStatSpecification, RKMETableSpecification, RKMETextSpecification, - TaskVectorSpecification, + GenerativeModelSpecification, rkme_solve_qp, ) from .system import HeteroMapTableSpecification, LLMGeneralCapabilitySpecification @@ -36,7 +36,7 @@ "RKMEStatSpecification", "RKMETableSpecification", "RKMETextSpecification", - "TaskVectorSpecification", + "GenerativeModelSpecification", "HeteroMapTableSpecification", "LLMGeneralCapabilitySpecification", "rkme_solve_qp", diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 127044a1..f6a8bf22 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -4,7 +4,7 @@ import pandas as pd import torch -from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification, TaskVectorSpecification +from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification, GenerativeModelSpecification from .utils import convert_to_numpy from ..config import C @@ -179,13 +179,13 @@ def generate_task_vector_spec( X: List[str], verbose: bool = True, **kwargs -) -> TaskVectorSpecification: +) -> GenerativeModelSpecification: # Check input type if not isinstance(X, list) or not all(isinstance(item, str) for item in X): raise TypeError("Input data must be a list of strings.") # Generate task vector spec - task_vector_spec = TaskVectorSpecification() + task_vector_spec = GenerativeModelSpecification() task_vector_spec.generate_stat_spec_from_data(X, verbose, **kwargs) return task_vector_spec diff --git a/learnware/specification/regular/__init__.py b/learnware/specification/regular/__init__.py index c6c69f9b..5e21f434 100644 --- a/learnware/specification/regular/__init__.py +++ b/learnware/specification/regular/__init__.py @@ -1,7 +1,7 @@ from .base import RegularStatSpecification from .image import RKMEImageSpecification from .table import RKMEStatSpecification, RKMETableSpecification, rkme_solve_qp -from .text import RKMETextSpecification, TaskVectorSpecification +from .text import RKMETextSpecification, GenerativeModelSpecification __all__ = [ "RegularStatSpecification", @@ -10,5 +10,5 @@ "RKMETableSpecification", "rkme_solve_qp", "RKMETextSpecification", - "TaskVectorSpecification" + "GenerativeModelSpecification" ] diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index 1960f0d9..f1888b84 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -2,6 +2,9 @@ from ..base import BaseStatSpecification +from torch.nn.functional import cosine_similarity + + class RegularStatSpecification(BaseStatSpecification): def generate_stat_spec(self, **kwargs): @@ -13,3 +16,21 @@ def generate_stat_spec_from_data(self, **kwargs): - kwargs also can include hyperparameters of specific method for specifaction generation """ raise NotImplementedError("generate_stat_spec_from_data is not implemented") + + +class TaskVectorSpecification(RegularStatSpecification): + + def _generate_models(self): + """Initialize foundational model (e.g. RoBERTa) used for task vector generation. + """ + pass + + @property + def task_vector(self): + raise NotImplemented + + def similarity(self, other: TaskVectorSpecification) -> float: + """Compute cosine similarity between two task vectors. + """ + v1, v2 = self.task_vector, other.task_vector + return cosine_similarity(v1, v2, dim=0) \ No newline at end of file diff --git a/learnware/specification/regular/text/__init__.py b/learnware/specification/regular/text/__init__.py index 157fac58..2dd221fc 100644 --- a/learnware/specification/regular/text/__init__.py +++ b/learnware/specification/regular/text/__init__.py @@ -5,10 +5,10 @@ if not is_torch_available(verbose=False): RKMETextSpecification = None - TaskVectorSpecification = None - logger.error("RKMETextSpecification and TaskVectorSpecification are not available because 'torch' is not installed!") + GenerativeModelSpecification = None + logger.error("RKMETextSpecification and GenerativeModelSpecification are not available because 'torch' is not installed!") else: from .rkme import RKMETextSpecification - from .task_vector import TaskVectorSpecification + from .generative import GenerativeModelSpecification -__all__ = ["RKMETextSpecification", "TaskVectorSpecification"] +__all__ = ["RKMETextSpecification", "GenerativeModelSpecification"] diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py new file mode 100644 index 00000000..d996d2d5 --- /dev/null +++ b/learnware/specification/regular/text/generative.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import tempfile +from typing import Any, Dict, List, Optional, Union + +import trl +import torch + +from torch import nn + +from trl import SFTConfig +from peft import LoraConfig +from datasets import Dataset + +from transformers import ( + PreTrainedModel, + TrainingArguments, + Qwen2ForCausalLM, + Qwen2Tokenizer + ) + +from peft import get_peft_model + +from ..base import TaskVectorSpecification +from ....logger import get_module_logger +from ....utils import allocate_cuda_idx, choose_device + +logger = get_module_logger("GenerativeModelSpecification", "INFO") + + +class GenerativeModelSpecification(TaskVectorSpecification): + """Task Vector Specification for Large Language Model""" + + def __init__(self, + cuda_idx: int = None, + attn_implementation: str = "eager", + per_device_train_batch_size: int = 2, + gradient_accumulation_steps: int = 1, + max_seq_length: int = 2048, + **kwargs): + """Initializing Task Vector Specification's parameters. + + Parameters + ---------- + cuda_idx : int, optional + A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. None indicates automatically choose device + + attn_implementation : str, optional + The type of attention implementation to use. Default is 'eager'. + + per_device_train_batch_size : int, optional + The training batch size for each device. Default is 2. + + gradient_accumulation_steps : int, optional + The number of steps to accumulate gradients before an optimizer step. + Default is 1. + + max_seq_length : int, optional + The maximum sequence length for the model input. Default is 2048. + + **kwargs : dict + Additional keyword arguments. + """ + super(GenerativeModelSpecification, self).__init__(type=self.__class__.__name__) + + self._cuda_idx = allocate_cuda_idx() if cuda_idx is None else cuda_idx + self._device = choose_device(cuda_idx=self._cuda_idx) + + self._task_vector = None + + self.attn_implementation = attn_implementation + self.per_device_train_batch_size = per_device_train_batch_size + self.gradient_accumulation_steps = gradient_accumulation_steps + self.max_seq_length = max_seq_length + + self.__extra_args = { + "weight_decay_l1": 1.5, + "weight_decay_l2": .0, + "max_steps": 400, + "lr": 1e-5, + "max_grad_norm": 1.0, + "warmup_ratio": 0.03, + } + + + @property + def task_vector(self): + if self._task_vector is None: + raise Exception("Call generate_stat_spec_from_data first!") + + return self._task_vector + + + def generate_stat_spec_from_data( + self, + X: List[str] = None, + dataset: Optional[Dataset] = None, + dataset_text_field="text", + verbose: bool = True, + **kwargs + ): + """Initializing Task Vector Specification's parameters. + + Parameters + ---------- + + dataset_text_field : str, optional + Name of the text field of the dataset. Default is "text". + + """ + if dataset is None: + assert X is not None, "X and dataset cannot both be None." + dataset = Dataset.from_dict({dataset_text_field: X}) + + with tempfile.TemporaryDirectory() as temp_dir: + tokenizer, model = self._init_tokenizer_model() + trainer_config = self._trainer_config(temp_dir, dataset_text_field) + trainer = self._init_trainer(model, tokenizer, dataset, trainer_config) + + param_0 = [p.detach().clone() for n, p in trainer.model.named_parameters() if p.requires_grad] + trainer.train() + param_1 = [p.detach().clone() for n, p in trainer.model.named_parameters() if p.requires_grad] + + self._task_vector = torch.concatenate([ + (p1 - p0).reshape(-1) for p0, p1 in zip(param_0, param_1) + ]) + + + def _init_tokenizer_model(self): + """ + Initialize foundational model (e.g. Qwen) used for task vector generation. + And, this method should not be overridden if the specification needs to be submitted to Beimingwu. + """ + tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen2.5-0.5B") + model = Qwen2ForCausalLM.from_pretrained( + "Qwen/Qwen2.5-0.5B", + attn_implementation=self.attn_implementation, + torch_dtype=torch.bfloat16, + ).to(self._device) + + peft_config = LoraConfig( + r=16, + lora_alpha=32, + lora_dropout=0.1, + bias="none", + task_type="CAUSAL_LM", + target_modules=["q_proj", "k_proj", "v_proj"] + ) + + model = get_peft_model(model, peft_config) + + # TODO: Load adpater weight from online + + for n, p in model.named_parameters(): + if "lora_A" in n: + p.requires_grad = False + + return tokenizer, model + + + def _init_trainer(self, model, tokenizer, train_dataset, args): + + # TODO: set_seed(3407) + trainer = CustomSFTTrainer( + model=model, + train_dataset=train_dataset, + tokenizer=tokenizer, + weight_decay_l1=self.__extra_args["weight_decay_l1"], + args=args, + ) + + return trainer + + + def _trainer_config(self, temp_dir, dataset_text_field): + training_params = SFTConfig( + output_dir=temp_dir, # 结果路径 + max_steps=self.__extra_args["max_steps"], + per_device_train_batch_size=self.per_device_train_batch_size, # 这是每个GPU的训练批次大小 + gradient_accumulation_steps=self.gradient_accumulation_steps, # 累积多个步骤的梯度,以有效地增加批次大小 + learning_rate=self.__extra_args["lr"], # 初始学习率 + weight_decay=self.__extra_args["weight_decay_l2"], # 权重衰减率 + optim="adamw_torch", # 优化器 + eval_strategy="no", + save_strategy="no", + # fp16=True, # 启用混合精度训练 + # bf16=True, # 启用BF16 + max_grad_norm=self.__extra_args["max_grad_norm"], # 裁剪梯度 + warmup_ratio=self.__extra_args["warmup_ratio"], # 训练开始时的预热样本比例 + group_by_length=True, # 将训练数据集中大致相同长度的样本分组到同一batch中,提升prefill效率 + lr_scheduler_type="cosine", # 学习率调度器衰减策略 + ddp_timeout=180000000, + dataset_text_field=dataset_text_field, + max_seq_length=self.max_seq_length, + dataloader_num_workers=16, + seed = 3407, + ) + + return training_params + + + def save(self, filepath: str): + torch.save({ + "type": self.type, + "task_vector": self.task_vector.detach().cpu() + }, filepath) + + + def load(self, filepath: str): + state = torch.load(filepath, weights_only=True) + if state["type"] != self.type: + logger.warning("{} may not be consistent with this class {}.".format( + state["type"], self.type + )) + self._task_vector = state["task_vector"].to(self._device) + + +class CustomSFTTrainer(trl.SFTTrainer): + + def __init__(self, weight_decay_l1=None, **kwargs): + super().__init__(**kwargs) + model: Union[PreTrainedModel, nn.Module] = kwargs["model"] + args: TrainingArguments = kwargs["args"] + + if hasattr(args, "weight_decay_l1") and (weight_decay_l1 is not None): + print("Warning! weight_decay_l1 is overwrited by key args.") + if weight_decay_l1 is not None: + self.weight_decay_l1 = weight_decay_l1 + elif hasattr(args, "weight_decay_l1"): + self.weight_decay_l1 = args.weight_decay_l1 + else: + assert False, "weight_decay_l1 shounld be given." + + self.parameters_l1_regularized = None + + def train( + self, + resume_from_checkpoint: Optional[Union[str, bool]] = None, + trial: Union["optuna.Trial", Dict[str, Any]] = None, + ignore_keys_for_eval: Optional[List[str]] = None, + **kwargs, + ): + self.parameters_l1_regularized = [ + (p, torch.nn.Parameter(p.clone().detach())) for n, p in self.model.named_parameters() if p.requires_grad + ] + + return super().train(resume_from_checkpoint=resume_from_checkpoint, trial=trial, + ignore_keys_for_eval=ignore_keys_for_eval, **kwargs) + + def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None): + # implement custom logic here + default_loss, outputs = super().compute_loss(model, inputs, return_outputs=True, num_items_in_batch=num_items_in_batch) + + if self.weight_decay_l1 > 0: + l1_norm = sum((torch.linalg.norm(p - p0, 1) for p, p0 in self.parameters_l1_regularized)) + # We mask lora_A after init. + l1_norm = self.weight_decay_l1 / len(self.parameters_l1_regularized) * l1_norm + loss = default_loss + l1_norm + else: + loss = default_loss + + return (loss, outputs) if return_outputs else loss + \ No newline at end of file diff --git a/learnware/specification/regular/text/task_vector.py b/learnware/specification/regular/text/task_vector.py deleted file mode 100644 index c21afa11..00000000 --- a/learnware/specification/regular/text/task_vector.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import os -from typing import Any, Dict, List, Union - -from ..base import RegularStatSpecification -from ....config import C -from ....logger import get_module_logger -from ....utils import allocate_cuda_idx, choose_device - -logger = get_module_logger("RKMETextSpecification", "INFO") - - -class TaskVectorSpecification(RegularStatSpecification): - """Task Vector Specification for Large Language Model""" - - def __init__(self, cuda_idx: int = None, **kwargs): - """Initializing Task Vector Specification's parameters. - - Parameters - ---------- - cuda_idx : int - A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. None indicates automatically choose device - """ - self.task_vector = None - self._cuda_idx = allocate_cuda_idx() if cuda_idx is None else cuda_idx - self._device = choose_device(cuda_idx=self._cuda_idx) - - self.model_config = None - - super(TaskVectorSpecification, self).__init__(type=self.__class__.__name__) - - def _generate_models(self): - """Initialize foundational model (e.g. RoBERTa) used for task vector generation. - """ - pass - - def generate_stat_spec_from_data( - self, - X: List[str], - verbose: bool = True, - **kwargs - ): - pass - - def dist(self, VectorSpec2: TaskVectorSpecification) -> float: - """Compute cosine similarity between two LLM task vectors. - """ - pass \ No newline at end of file From 0458788cf2040b191556efdeff47303f92f054b0 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Fri, 20 Dec 2024 15:58:35 +0800 Subject: [PATCH 048/108] Add Test for LLM Specification. --- .../test_text_generative.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tests/test_specification/test_text_generative.py diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py new file mode 100644 index 00000000..bd76052c --- /dev/null +++ b/tests/test_specification/test_text_generative.py @@ -0,0 +1,75 @@ +import json +import os +import tempfile +import unittest + +import numpy as np +import torch + +import sys + +from learnware.specification.regular.text import GenerativeModelSpecification + + +# Import from our project + +if os.path.expanduser(os.environ["LIB_PATH"]) not in sys.path: + sys.path.append(os.path.expanduser(os.environ["LIB_PATH"])) + + +from src.datasets.llm.utils import set_seed, prepare_train_data +from src.datasets.llm.benchmark import Benchmark + + +class TestGenerativeModelSpecification(unittest.TestCase): + @staticmethod + def _test_with_X(X): + spec = GenerativeModelSpecification(max_steps=5) + spec.generate_stat_spec_from_data(X=X, dataset_text_field="txt") + + task_vector = spec.task_vector + + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + spec_path = os.path.join(tempdir, "spec.pth") + spec.save(spec_path) + + data = torch.load(spec_path, weights_only=True) + assert data["type"] == "GenerativeModelSpecification" + + spec2 = GenerativeModelSpecification() + spec2.load(spec_path) + + torch.testing.assert_close(task_vector, spec2.task_vector) + + assert spec2.type == "GenerativeModelSpecification" + + @staticmethod + def _test_with_dataset(dataset): + spec = GenerativeModelSpecification(max_steps=5) + spec.generate_stat_spec_from_data(dataset=dataset) + + task_vector = spec.task_vector + + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + spec_path = os.path.join(tempdir, "spec.pth") + spec.save(spec_path) + + data = torch.load(spec_path, weights_only=True) + assert data["type"] == "GenerativeModelSpecification" + + spec2 = GenerativeModelSpecification() + spec2.load(spec_path) + + torch.testing.assert_close(task_vector, spec2.task_vector) + assert spec2.type == "GenerativeModelSpecification" + + def test_image_rkme(self): + benchmark = Benchmark("medical") + train_dataset = benchmark.get_user_dataset("pubmedqa") + + self._test_with_X(train_dataset["text"]) + self._test_with_dataset(train_dataset) + + +if __name__ == "__main__": + unittest.main() From c1f599433c1c03ca594d44fe114bb095784757a0 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Mon, 23 Dec 2024 19:05:11 +0800 Subject: [PATCH 049/108] [MIT] Remove some redundant codes. --- learnware/specification/regular/base.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index f1888b84..e7fb1f1b 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -5,7 +5,6 @@ from torch.nn.functional import cosine_similarity - class RegularStatSpecification(BaseStatSpecification): def generate_stat_spec(self, **kwargs): self.generate_stat_spec_from_data(**kwargs) @@ -20,11 +19,6 @@ def generate_stat_spec_from_data(self, **kwargs): class TaskVectorSpecification(RegularStatSpecification): - def _generate_models(self): - """Initialize foundational model (e.g. RoBERTa) used for task vector generation. - """ - pass - @property def task_vector(self): raise NotImplemented From c21ed432c223a7f7efc43139bf12ee9b32d838c9 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Tue, 24 Dec 2024 22:36:20 +0800 Subject: [PATCH 050/108] [ENH | FIX] complete llm benchmark, general capability specification and fix some bugs. --- .../llm_general_capability_spec/spec.py | 83 +++- learnware/tests/benchmarks/__init__.py | 76 +++- learnware/tests/benchmarks/config.py | 11 + .../tests/benchmarks/llm_process_funcs.py | 390 ++++++++++++++++++ 4 files changed, 549 insertions(+), 11 deletions(-) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 50931cf2..7a5fb67f 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -1,9 +1,14 @@ from __future__ import annotations from typing import List, Optional +import lm_eval +from lm_eval.models.huggingface import HFLM +import codecs +import json +import os from .config import general_capability_benchmark_configs from ..base import SystemStatSpecification -from ....tests.benchmarks import BenchmarkConfig +from ....tests.benchmarks import LLMBenchmarkConfig from ....logger import get_module_logger from ....learnware import Learnware @@ -13,18 +18,67 @@ class LLMGeneralCapabilitySpecification(SystemStatSpecification): """Large Language Model General Capability Specification""" - benchmark_configs: List[BenchmarkConfig] = general_capability_benchmark_configs + benchmark_configs: List[LLMBenchmarkConfig] = general_capability_benchmark_configs def __init__(self): + self.score_dict = None super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) + @staticmethod + def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]): + """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs. + + Parameters + ---------- + learnware : Learnware + Learnware to generate General Capability Specification. + benchmark_configs : Optional[List[LLMBenchmarkConfig]] + List of LLMBenchmarkConfig, set to self.benchmark_configs if None. + """ + base_model = learnware.get_model() # to be modified + task_list = [config.name for config in benchmark_configs] + + lm_obj = HFLM(pretrained=base_model, batch_size=16) + task_manager = lm_eval.tasks.TaskManager() + results = lm_eval.simple_evaluate( + model=lm_obj, + tasks=task_list, + task_manager=task_manager, + ) + return results + def generate_stat_spec_from_system( self, learnware: Learnware, - benchmark_configs: Optional[List[BenchmarkConfig]] = None, + benchmark_configs: Optional[List[LLMBenchmarkConfig]] = None, update_existing: bool = False, - ) -> dict: - pass + ): + """Construct Large Language Model General Capability Specification for Learnware. + + Parameters + ---------- + learnware : Learnware + Learnware to generate General Capability Specification. + benchmark_configs : Optional[List[LLMBenchmarkConfig]] + List of LLMBenchmarkConfig, set to self.benchmark_configs if None. + update_existing : bool + A flag indicating whether to update existing General Capability Specification's scores dict, by default false. + """ + if not benchmark_configs: + benchmark_configs = self.benchmark_configs + if update_existing: + results = self._evaluate(learnware, benchmark_configs) + self.score_dict = {} + for config in benchmark_configs: + self.score_dict[config] = results['results'][config.name][f'{config.eval_metric},none'] + else: + self.score_dict = learnware.get_specification().get_stat_spec_by_name("LLMGeneralCapabilitySpecification") + exist_config_list = list(self.score_dict.keys()) + remain_config_list = [config for config in self.benchmark_configs if config not in exist_config_list] + if remain_config_list: + results = self._evaluate(learnware, remain_config_list) + for config in remain_config_list: + self.score_dict[config] = results['results'][config.name][f'{config.eval_metric},none'] def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. @@ -34,7 +88,10 @@ def save(self, filepath: str): filepath : str The specified saving path """ - raise NotImplementedError("save is not implemented") + save_path = filepath + spec_to_save = self.get_states() + with codecs.open(save_path, "w", encoding="utf-8") as fout: + json.dump(spec_to_save, fout, separators=(",", ":")) def load(self, filepath: str) -> bool: """Load a specification file in JSON format from the specified path. @@ -49,4 +106,16 @@ def load(self, filepath: str) -> bool: bool True if the specification is loaded successfully. """ - raise NotImplementedError("load is not implemented") + load_path = filepath + if os.path.exists(load_path): + with codecs.open(load_path, "r", encoding="utf-8") as fin: + obj_text = fin.read() + spec_load = json.loads(obj_text) + + for d in self.get_states(): + if d in spec_load.keys(): + if d == "type" and spec_load[d] != self.type: + raise TypeError( + f"The type of loaded Specification ({spec_load[d]}) is different from the expected type ({self.type})!" + ) + setattr(self, d, spec_load[d]) diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index c59418bf..25718815 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -4,6 +4,7 @@ import zipfile from dataclasses import dataclass from typing import List, Optional, Tuple, Union, Callable +from datasets import load_dataset, Dataset import numpy as np @@ -74,13 +75,72 @@ def get_train_data( @dataclass class LLMBenchmark: name: str + # HF dataset options + dataset_path: Optional[str] = None + subset_name: Optional[str] = None + dataset_kwargs: Optional[dict] = None + train_split: Optional[str] = None + validation_split: Optional[str] = None + test_split: Optional[str] = None + # evaluation options + eval_metric: Optional[str] = None + # formatting / prompting options preprocess_function: Optional[Callable] = None - - def get_train_val_data(self) -> List[str]: - pass + response_template: Optional[str] = None + + def __post_init__(self) -> None: + self.prepare_dataset() + + def prepare_dataset(self) -> None: + self.dataset = load_dataset( + path=self.dataset_path if self.dataset_path else self.name, + name=self.subset_name, + **self.dataset_kwargs if self.dataset_kwargs is not None else {}, + ) + + def get_train_dataset(self) -> Dataset: + if self.train_split: + train_dataset = self.dataset[self.train_split] + if self.dataset_path == "meta-math/GSM8K_zh": + train_dataset = train_dataset.filter(lambda x: x['split']=='train') + if self.preprocess_function: + train_dataset = train_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + return train_dataset + + def get_val_dataset(self) -> Dataset: + if self.validation_split: + val_dataset = self.dataset[self.validation_split] + if self.preprocess_function: + val_dataset = val_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + return val_dataset + + def get_test_dataset(self) -> Dataset: + if self.test_split: + test_dataset = self.dataset[self.test_split] + if self.preprocess_function: + test_dataset = test_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + return test_dataset + + def get_train_data(self) -> List[str]: + if not self.preprocess_function: + raise Exception("Must specify a preprocess function to get train data!") + train_dataset = self.get_train_dataset() + train_data = train_dataset["text"] + return train_data + + def get_val_data(self) -> List[str]: + if not self.preprocess_function: + raise Exception("Must specify a preprocess function to get validation data!") + val_dataset = self.get_val_dataset() + val_data = val_dataset["text"] + return val_data def get_test_data(self) -> List[str]: - pass + if not self.preprocess_function: + raise Exception("Must specify a preprocess function to get test data!") + test_dataset = self.get_test_dataset() + test_data = test_dataset["text"] + return test_data class LearnwareBenchmarkManager: @@ -172,7 +232,15 @@ def get_benchmark(self, benchmark_config: Union[str, BenchmarkConfig, LLMBenchma if isinstance(benchmark_config, LLMBenchmarkConfig): return LLMBenchmark( name=benchmark_config.name, + dataset_path=benchmark_config.dataset_path, + subset_name=benchmark_config.subset_name, + dataset_kwargs=benchmark_config.dataset_kwargs, + train_split=benchmark_config.train_split, + validation_split=benchmark_config.validation_split, + test_split=benchmark_config.test_split, + eval_metric=benchmark_config.eval_metric, preprocess_function=benchmark_config.preprocess_function, + response_template=benchmark_config.response_template, ) elif isinstance(benchmark_config, BenchmarkConfig): diff --git a/learnware/tests/benchmarks/config.py b/learnware/tests/benchmarks/config.py index 90bf31aa..714a7f7f 100644 --- a/learnware/tests/benchmarks/config.py +++ b/learnware/tests/benchmarks/config.py @@ -15,7 +15,18 @@ class BenchmarkConfig: @dataclass class LLMBenchmarkConfig: name: str + # HF dataset options + dataset_path: Optional[str] = None + subset_name: Optional[str] = None + dataset_kwargs: Optional[dict] = None + train_split: Optional[str] = None + validation_split: Optional[str] = None + test_split: Optional[str] = None + # evaluation options + eval_metric: Optional[str] = None + # formatting / prompting options preprocess_function: Optional[Callable] = None + response_template: Optional[str] = None benchmark_configs: Dict[str, Union[BenchmarkConfig, LLMBenchmarkConfig]] = {} diff --git a/learnware/tests/benchmarks/llm_process_funcs.py b/learnware/tests/benchmarks/llm_process_funcs.py index e69de29b..40dd2f27 100644 --- a/learnware/tests/benchmarks/llm_process_funcs.py +++ b/learnware/tests/benchmarks/llm_process_funcs.py @@ -0,0 +1,390 @@ +import re +from typing import List + + +def preprocess_alpaca(docs) -> List[str]: + alpaca_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}" + instructions = docs["instruction"] + inputs = docs["input"] + outputs = docs["output"] + texts = [] + for instruction, input, output in zip(instructions, inputs, outputs): + text = alpaca_prompt.format(instruction, input, output) + texts.append(text) + return texts + + +def preprocess_alpaca_no_label(docs) -> List[str]: + alpaca_no_label_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n" + instructions = docs["instruction"] + inputs = docs["input"] + texts = [] + for instruction, input in zip(instructions, inputs): + text = alpaca_no_label_prompt.format(instruction, input) + texts.append(text) + return texts + + +def preprocess_alpaca_no_input(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["instruction"] + outputs = docs["output"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_alpaca_no_input_no_label(docs) -> List[str]: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["instruction"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qr(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query"] + outputs = docs["response"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qr_no_label(docs) -> List[str]: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["query"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qr_zh(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query_zh"] + outputs = docs["response_zh"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qr_zh_no_label(docs) -> List[str]: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["query_zh"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qa(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qa_no_label(docs) -> List[str]: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["question"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qa_zh(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question_zh"] + outputs = docs["answer_zh"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qa_zh_no_label(docs) -> List[str]: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["question_zh"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_finance(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + instruction.rstrip(' Answer:') + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_math_train(docs) -> List[str]: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question"] + outputs = docs["answer_detail"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + + +def preprocess_medmcqa_no_label(docs) -> List[str]: + opas = docs["opa"] + opbs = docs["opb"] + opcs = docs["opc"] + opds = docs["opd"] + questions = docs["question"] + texts = [] + for opa, opb, opc, opd, question in zip(opas, opbs, opcs, opds, questions): + option_choices = { + "A": opa, + "B": opb, + "C": opc, + "D": opd, + } + prompt = "Question: " + question + "\nChoices:\n" + for choice, option in option_choices.items(): + prompt += f"{choice.upper()}. {option}\n" + prompt += f"Answer:" + texts.append(prompt) + return texts + + +def preprocess_medmcqa(docs) -> List[str]: + opas = docs["opa"] + opbs = docs["opb"] + opcs = docs["opc"] + opds = docs["opd"] + questions = docs["question"] + option_ids = docs["cop"] + texts = [] + for opa, opb, opc, opd, question, option_id in zip(opas, opbs, opcs, opds, questions, option_ids): + option_choices = { + "A": opa, + "B": opb, + "C": opc, + "D": opd, + } + prompt = "Question: " + question + "\nChoices:\n" + for choice, option in option_choices.items(): + prompt += f"{choice.upper()}. {option}\n" + prompt += f"Answer: {list(option_choices.keys())[option_id]}" + texts.append(prompt) + return texts + + +def preprocess_medqa_no_label(docs) -> List[str]: + ending0s = docs["ending0"] + ending1s = docs["ending1"] + ending2s = docs["ending2"] + ending3s = docs["ending3"] + sent1s = docs["sent1"] + texts = [] + for sent1, ending0, ending1, ending2, ending3 in zip(sent1s, ending0s, ending1s, ending2s, ending3s): + option_choices = { + "A": ending0, + "B": ending1, + "C": ending2, + "D": ending3, + } + answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items()) + texts.append(f"Question: {sent1}\n{answers}Answer:") + return texts + + +def preprocess_medqa(docs) -> List[str]: + ending0s = docs["ending0"] + ending1s = docs["ending1"] + ending2s = docs["ending2"] + ending3s = docs["ending3"] + sent1s = docs["sent1"] + labels = docs["label"] + texts = [] + for sent1, ending0, ending1, ending2, ending3, label in zip(sent1s, ending0s, ending1s, ending2s, ending3s, labels): + option_choices = { + "A": ending0, + "B": ending1, + "C": ending2, + "D": ending3, + } + answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items()) + texts.append(f"Question: {sent1}\n{answers}Answer: {list(option_choices.keys())[label]}") + return texts + + +def preprocess_mmlu_no_label(docs) -> List[str]: + questions = docs["question"] + choices = docs["choices"] + texts = [] + for question, options in zip(questions, choices): + texts.append( + "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer:".format( + question.strip(), + options[0], + options[1], + options[2], + options[3] + ) + ) + return texts + + +def preprocess_mmlu(docs) -> List[str]: + questions = docs["question"] + choices = docs["choices"] + answers = docs["answer"] + texts = [] + for question, options, answer in zip(questions, choices, answers): + texts.append( + "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer: {}".format( + question.strip(), + options[0], + options[1], + options[2], + options[3], + ["A", "B", "C", "D"][answer] + ) + ) + return texts + + +def preprocess_pubmedqa_no_label(docs) -> List[str]: + contexts_list = docs["CONTEXTS"] + questions = docs["QUESTION"] + texts = [] + for contexts, question in zip(contexts_list, questions): + ctxs = "\n".join(contexts) + texts.append("Abstract: {}\nQuestion: {}\nAnswer:".format(ctxs, question)) + return texts + + +def preprocess_pubmedqa(docs) -> List[str]: + contexts_list = docs["CONTEXTS"] + questions = docs["QUESTION"] + answers = docs["final_decision"] + texts = [] + for contexts, question, answer in zip(contexts_list, questions, answers): + ctxs = "\n".join(contexts) + texts.append("Abstract: {}\nQuestion: {}\nAnswer: {}".format(ctxs, question, answer)) + return texts + + +def preprocess_agieval_no_label(docs) -> List[str]: + return docs["query"] + + +def preprocess_cmmlu_no_label(docs) -> List[str]: + questions = docs["Question"] + as_ = docs["A"] + bs = docs["B"] + cs = docs["C"] + ds = docs["D"] + texts = [] + for question, a, b, c, d in zip(questions, as_, bs, cs, ds): + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format( + question.strip(), a, b, c, d + )) + return texts + + +def preprocess_cmmlu(docs) -> List[str]: + questions = docs["Question"] + as_ = docs["A"] + bs = docs["B"] + cs = docs["C"] + ds = docs["D"] + answers = docs["Answer"] + texts = [] + for question, a, b, c, d, answer in zip(questions, as_, bs, cs, ds, answers): + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format( + question.strip(), a, b, c, d, answer + )) + return texts + + +def preprocess_mathqa_no_label(docs) -> List[str]: + problems = docs["Problem"] + texts = [f"Question: {problem}\nAnswer:" for problem in problems] + return texts + + +def preprocess_mathqa(docs) -> List[str]: + problems = docs["Problem"] + corrects = docs["correct"] + options = docs["options"] + texts = [] + for problem, correct, option in zip(problems, corrects, options): + choices = [ + c[4:].rstrip(" ,") + for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option) + ] + + # answer = ['a', 'b', 'c', 'd', 'e'].index(correct) + texts.append("Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format(problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct)) + return texts + + +def preprocess_mgsm_no_label(docs) -> List[str]: + questions = docs["question"] + texts = [f"问题: "+question+"\n逐步解答:" for question in questions] + return texts + + +def preprocess_mgsm(docs) -> List[str]: + questions = docs["question"] + answers = docs["answer"] + texts = [question + "\n" + answer for question, answer in zip(questions, answers)] + return texts + + +def preprocess_gsm8k_no_label(docs) -> List[str]: + questions = docs["question"] + texts = [f"Question: {question}\nAnswer:" for question in questions] + return texts + + +def preprocess_gsm8k(docs) -> List[str]: + instructions = docs["question"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = f"Question: {instruction}\nAnswer: {output}" + texts.append(text) + return texts + + +def preprocess_math_no_label(docs) -> List[str]: + problems = docs["problem"] + texts = ["Problem:" + "\n" + problem + "\n\n" + "Solution:" for problem in problems] + return texts + + +def preprocess_finance_no_label(docs) -> List[str]: + return docs["query"] + From ac3b71b1457b6b824dc5dd583afa50c7f882893d Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Wed, 1 Jan 2025 13:44:27 +0800 Subject: [PATCH 051/108] [ENH] Add _search_by_taskvector_spec_single in LLMStatSearcher --- learnware/market/llm/searcher.py | 49 ++++++++++++++++++- .../specification/regular/text/generative.py | 2 +- .../test_text_generative.py | 33 +++++++++++-- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index c89a0bd7..6421701d 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -1,4 +1,9 @@ -from typing import Optional +from typing import List, Optional, Tuple, Union + +import numpy as np + +from learnware.learnware.base import Learnware +from learnware.specification.base import Specification from ..base import BaseUserInfo, SearchResults, AtomicSearcher from ...logger import get_module_logger @@ -58,3 +63,45 @@ def __call__( the second is the sorted list of Learnware (single) by the rkme dist """ pass + + def _search_by_taskvector_spec_single( + self, + learnware_list: List[Learnware], + user_spec: Union[Specification], + stat_spec_type: str = "GenerativeModelSpecification" + ) -> Tuple[List[float], List[Learnware]]: + """Calculate the distances between learnwares in the given learnware_list and user_spec + + Parameters + ---------- + learnware_list : List[Learnware] + The list of learnwares whose mixture approximates the user's rkme + user_rkme : Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification] + user Task Vector statistical specification + stat_spec_type : str + GenerativeModelSpecification by default. + + Returns + ------- + Tuple[List[float], List[Learnware]] + the first is the list of cosine similarity + the second is the list of Learnware + both lists are sorted by cosine similarity + """ + spec_list = [learnware.specification.get_stat_spec_by_name(stat_spec_type) for learnware in learnware_list] + filtered_idx_list, similarity_list = [], [] + for idx, s in enumerate(spec_list): + similarity = float(s.similarity(user_spec)) + if np.isfinite(similarity): + similarity_list.append(similarity) + filtered_idx_list.append(idx) + else: + logger.warning( + f"The distance between user_spec and learnware_spec (id: {learnware_list[idx].id}) is not finite, where distance is {mmd_dist}" + ) + + sorted_idx_list = reversed(sorted(range(len(similarity_list)), key=lambda k: similarity_list[k])) + sorted_dist_list = [similarity_list[idx] for idx in sorted_idx_list] + sorted_learnware_list = [learnware_list[filtered_idx_list[idx]] for idx in sorted_idx_list] + + return sorted_dist_list, sorted_learnware_list \ No newline at end of file diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index d996d2d5..d16000b1 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -93,9 +93,9 @@ def task_vector(self): def generate_stat_spec_from_data( self, - X: List[str] = None, dataset: Optional[Dataset] = None, dataset_text_field="text", + X: List[str] = None, verbose: bool = True, **kwargs ): diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index bd76052c..0646257f 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -8,6 +8,9 @@ import sys +from learnware.learnware.base import Learnware +from learnware.market.llm import LLMStatSearcher +from learnware.specification.base import Specification from learnware.specification.regular.text import GenerativeModelSpecification @@ -63,12 +66,34 @@ def _test_with_dataset(dataset): torch.testing.assert_close(task_vector, spec2.task_vector) assert spec2.type == "GenerativeModelSpecification" - def test_image_rkme(self): + # def test_generating_spec(self): + # benchmark = Benchmark("medical") + # train_dataset = benchmark.get_user_dataset("pubmedqa") + + # self._test_with_X(train_dataset["text"]) + # self._test_with_dataset(train_dataset) + + def test_searching_spec(self): benchmark = Benchmark("medical") - train_dataset = benchmark.get_user_dataset("pubmedqa") - self._test_with_X(train_dataset["text"]) - self._test_with_dataset(train_dataset) + specs, learnwares = [], [] + for i, dataset_name in enumerate(["pubmedqa", "medmcqa"]): + train_dataset = benchmark.get_user_dataset("pubmedqa") + + spec = GenerativeModelSpecification(max_steps=5) + spec.generate_stat_spec_from_data(dataset=train_dataset) + + specs.append(spec) + learnwares.append(Learnware(str(i), None, Specification( + stat_spec={spec.type: spec} + ), "")) + + searcher = LLMStatSearcher(None) + searcher._search_by_taskvector_spec_single( + learnwares, + specs[-1], + specs[-1].type + ) if __name__ == "__main__": From 82d54611c5b7169f63fcc53beb42386c26eb5b1d Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Wed, 1 Jan 2025 16:26:27 +0800 Subject: [PATCH 052/108] [MNT] Releasing commented out test code --- tests/test_specification/test_text_generative.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 0646257f..381d13e5 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -66,12 +66,12 @@ def _test_with_dataset(dataset): torch.testing.assert_close(task_vector, spec2.task_vector) assert spec2.type == "GenerativeModelSpecification" - # def test_generating_spec(self): - # benchmark = Benchmark("medical") - # train_dataset = benchmark.get_user_dataset("pubmedqa") + def test_generating_spec(self): + benchmark = Benchmark("medical") + train_dataset = benchmark.get_user_dataset("pubmedqa") - # self._test_with_X(train_dataset["text"]) - # self._test_with_dataset(train_dataset) + self._test_with_X(train_dataset["text"]) + self._test_with_dataset(train_dataset) def test_searching_spec(self): benchmark = Benchmark("medical") From 95cad2f921525e62dc64449bfc4e3d6c0453fdfa Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Thu, 2 Jan 2025 16:41:01 +0800 Subject: [PATCH 053/108] [MNT] add check for learnware.yaml's new fields --- learnware/config.py | 1 + learnware/learnware/__init__.py | 18 ++++++++++++++++++ learnware/specification/module.py | 2 ++ 3 files changed, 21 insertions(+) diff --git a/learnware/config.py b/learnware/config.py index ef87a028..01df99c5 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -167,6 +167,7 @@ def get_platform(): "learnware_folder_config": { "yaml_file": "learnware.yaml", "module_file": "__init__.py", + "weights_file_path" : "weights", }, "database_url": f"sqlite:///{DATABASE_PATH}", "max_reduced_set_size": 1310720, diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index 60996a75..ae18d065 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -35,6 +35,8 @@ def get_learnware_from_dirpath( learnware_config = { "model": { "class_name": "Model", + "weights_file_path": "weights", + "required_learnware_ids": [], "kwargs": {}, }, "stat_specifications": [ @@ -65,6 +67,22 @@ def get_learnware_from_dirpath( if "module_path" not in learnware_config["model"]: learnware_config["model"]["module_path"] = C.learnware_folder_config["module_file"] + if ( + semantic_spec["Data"]["Values"] == ["Text"] + and semantic_spec["Task"]["Values"] == ["Text Generation"] + ): + if "weights_file_path" not in learnware_config["model"]: + learnware_config["model"]["weights_file_path"] = C.learnware_folder_config["weights_file_path"] + + learnware_weights_path = os.path.join(learnware_dirpath, learnware_config["model"]["weights_file_path"]) + assert os.path.exists( + learnware_weights_path + ), f"Weights are not found for the Text Generation Model learnware_{id}, please check the learnware.yaml or zipfile." + + if semantic_spec["Model Type"]["Values"] == ["PEFT Model"]: + assert "required_learnware_ids" in learnware_config["model"], f"'required_learnware_ids' is not found for the PEFT Model learnware_{id}, please check the learnware.yaml." + assert len(learnware_config["model"]["required_learnware_ids"]) != 0, f"'required_learnware_ids' can't be empty for the PEFT Model learnware_{id}, please check the learnware.yaml." + learnware_spec = Specification() for _stat_spec in learnware_config["stat_specifications"]: stat_spec = _stat_spec.copy() diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 127044a1..8d8be23f 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -226,6 +226,7 @@ def generate_semantic_spec( description: Optional[str] = None, data_type: Optional[str] = None, task_type: Optional[str] = None, + model_type: Optional[str] = None, library_type: Optional[str] = None, scenarios: Optional[Union[str, List[str]]] = None, license: Optional[Union[str, List[str]]] = None, @@ -235,6 +236,7 @@ def generate_semantic_spec( semantic_specification = dict() semantic_specification["Data"] = {"Type": "Class", "Values": [data_type] if data_type is not None else []} semantic_specification["Task"] = {"Type": "Class", "Values": [task_type] if task_type is not None else []} + semantic_specification["Model Type"] = {"Type": "Class", "Values": [model_type] if model_type is not None else ["Others"]} semantic_specification["Library"] = { "Type": "Class", "Values": [library_type] if library_type is not None else [], From e3916ba6c5bfb241f82ee8af8cd4586fe2ec0af9 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Fri, 3 Jan 2025 02:16:26 +0800 Subject: [PATCH 054/108] [MNT] modify details in general_capability_spec --- .../llm_general_capability_spec/__init__.py | 14 ++++++ .../llm_general_capability_spec/config.py | 49 ++++++++++++++++++- .../llm_general_capability_spec/spec.py | 30 +++++++----- 3 files changed, 80 insertions(+), 13 deletions(-) create mode 100644 learnware/specification/system/llm_general_capability_spec/__init__.py diff --git a/learnware/specification/system/llm_general_capability_spec/__init__.py b/learnware/specification/system/llm_general_capability_spec/__init__.py new file mode 100644 index 00000000..a067f62b --- /dev/null +++ b/learnware/specification/system/llm_general_capability_spec/__init__.py @@ -0,0 +1,14 @@ +from ....logger import get_module_logger +from ....utils import is_torch_available + +logger = get_module_logger("system_general_capability_spec") + +if not is_torch_available(verbose=False): + LLMGeneralCapabilitySpecification = None + logger.error( + "LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!" + ) +else: + from .spec import LLMGeneralCapabilitySpecification + +__all__ = ["LLMGeneralCapabilitySpecification"] \ No newline at end of file diff --git a/learnware/specification/system/llm_general_capability_spec/config.py b/learnware/specification/system/llm_general_capability_spec/config.py index e7a04e20..80f4e084 100644 --- a/learnware/specification/system/llm_general_capability_spec/config.py +++ b/learnware/specification/system/llm_general_capability_spec/config.py @@ -1,5 +1,50 @@ from typing import List -from ....tests.benchmarks import BenchmarkConfig +from ....tests.benchmarks import LLMBenchmarkConfig -general_capability_benchmark_configs: List[BenchmarkConfig] = [] +general_capability_benchmark_configs: List[LLMBenchmarkConfig] = [ + LLMBenchmarkConfig( + name="mmlu", + dataset_path="hails/mmlu_no_train", + validation_split="validation", + test_split="test", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="leaderboard_bbh", + dataset_path="SaylorTwift/bbh", + test_split="test", + eval_metric="acc_norm", + ), + LLMBenchmarkConfig( + name="leaderboard_gpqa", + dataset_path="Idavidrein/gpqa", + test_split="train", + eval_metric="acc_norm", + ), + LLMBenchmarkConfig( + name="leaderboard_ifeval", + dataset_path="wis-k/instruction-following-eval", + test_split="train", + eval_metric="inst_level_strict_acc", + ), + LLMBenchmarkConfig( + name="leaderboard_math_hard", + dataset_path="lighteval/MATH-Hard", + train_split="train", + test_split="test", + eval_metric="exact_match", + ), + LLMBenchmarkConfig( + name="leaderboard_mmlu_pro", + dataset_path="TIGER-Lab/MMLU-Pro", + validation_split="validation", + test_split="test", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="leaderboard_musr", + dataset_path="TAUR-Lab/MuSR", + eval_metric="acc_norm", + ), +] diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 7a5fb67f..e1fa1916 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -10,7 +10,7 @@ from ..base import SystemStatSpecification from ....tests.benchmarks import LLMBenchmarkConfig from ....logger import get_module_logger -from ....learnware import Learnware +# from learnware.learnware import Learnware # TODO logger = get_module_logger("llm_general_capability_spec") @@ -25,7 +25,7 @@ def __init__(self): super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) @staticmethod - def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]): + def _evaluate(learnware, benchmark_configs: List[LLMBenchmarkConfig]): """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs. Parameters @@ -35,7 +35,8 @@ def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]) benchmark_configs : Optional[List[LLMBenchmarkConfig]] List of LLMBenchmarkConfig, set to self.benchmark_configs if None. """ - base_model = learnware.get_model() # to be modified + learnware.instantiate_model() + base_model = learnware.get_model().get_model() task_list = [config.name for config in benchmark_configs] lm_obj = HFLM(pretrained=base_model, batch_size=16) @@ -49,7 +50,7 @@ def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]) def generate_stat_spec_from_system( self, - learnware: Learnware, + learnware, benchmark_configs: Optional[List[LLMBenchmarkConfig]] = None, update_existing: bool = False, ): @@ -64,21 +65,28 @@ def generate_stat_spec_from_system( update_existing : bool A flag indicating whether to update existing General Capability Specification's scores dict, by default false. """ - if not benchmark_configs: + if benchmark_configs: + for config in benchmark_configs: + if config.eval_metric == None: + raise Exception("Must specify a evaluation metric in a LLMBenchmarkConfig object to evaluate learnware on it.") + else: benchmark_configs = self.benchmark_configs + self.score_dict = {} if update_existing: results = self._evaluate(learnware, benchmark_configs) - self.score_dict = {} for config in benchmark_configs: - self.score_dict[config] = results['results'][config.name][f'{config.eval_metric},none'] + self.score_dict[config.name] = results['results'][config.name][f'{config.eval_metric},none'] else: - self.score_dict = learnware.get_specification().get_stat_spec_by_name("LLMGeneralCapabilitySpecification") - exist_config_list = list(self.score_dict.keys()) - remain_config_list = [config for config in self.benchmark_configs if config not in exist_config_list] + exist_config_list = [] + general_spec = learnware.get_specification().get_stat_spec_by_name("LLMGeneralCapabilitySpecification") + if general_spec: + exist_config_list = list(general_spec.score_dict.keys()) + self.score_dict = general_spec.score_dict.copy() + remain_config_list = [config.name for config in benchmark_configs if config.name not in exist_config_list] if remain_config_list: results = self._evaluate(learnware, remain_config_list) for config in remain_config_list: - self.score_dict[config] = results['results'][config.name][f'{config.eval_metric},none'] + self.score_dict[config.name] = results['results'][config.name][f'{config.eval_metric},none'] def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. From e7402d9c342ce32b6bf5cdb4d616975ee508b2d2 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Tue, 7 Jan 2025 22:52:46 +0800 Subject: [PATCH 055/108] [MNT] modify class annotations --- .../specification/system/llm_general_capability_spec/spec.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index e1fa1916..1f71f8d6 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -10,7 +10,6 @@ from ..base import SystemStatSpecification from ....tests.benchmarks import LLMBenchmarkConfig from ....logger import get_module_logger -# from learnware.learnware import Learnware # TODO logger = get_module_logger("llm_general_capability_spec") @@ -25,7 +24,7 @@ def __init__(self): super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) @staticmethod - def _evaluate(learnware, benchmark_configs: List[LLMBenchmarkConfig]): + def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]): """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs. Parameters @@ -50,7 +49,7 @@ def _evaluate(learnware, benchmark_configs: List[LLMBenchmarkConfig]): def generate_stat_spec_from_system( self, - learnware, + learnware: Learnware, benchmark_configs: Optional[List[LLMBenchmarkConfig]] = None, update_existing: bool = False, ): From 0e6b96c3522b1712eabd6dba9644b0ca4eaca557 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Thu, 9 Jan 2025 16:53:26 +0800 Subject: [PATCH 056/108] [MNT] modify test_text_generative.py --- .../test_text_generative.py | 18 +----- .../text_generative_utils.py | 57 +++++++++++++++++++ 2 files changed, 60 insertions(+), 15 deletions(-) create mode 100644 tests/test_specification/text_generative_utils.py diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 381d13e5..70e03d7f 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -13,16 +13,7 @@ from learnware.specification.base import Specification from learnware.specification.regular.text import GenerativeModelSpecification - -# Import from our project - -if os.path.expanduser(os.environ["LIB_PATH"]) not in sys.path: - sys.path.append(os.path.expanduser(os.environ["LIB_PATH"])) - - -from src.datasets.llm.utils import set_seed, prepare_train_data -from src.datasets.llm.benchmark import Benchmark - +from text_generative_utils import DATASET, prepare_data class TestGenerativeModelSpecification(unittest.TestCase): @staticmethod @@ -67,18 +58,15 @@ def _test_with_dataset(dataset): assert spec2.type == "GenerativeModelSpecification" def test_generating_spec(self): - benchmark = Benchmark("medical") - train_dataset = benchmark.get_user_dataset("pubmedqa") + train_dataset = prepare_data(DATASET["pubmedqa"]) self._test_with_X(train_dataset["text"]) self._test_with_dataset(train_dataset) def test_searching_spec(self): - benchmark = Benchmark("medical") - specs, learnwares = [], [] for i, dataset_name in enumerate(["pubmedqa", "medmcqa"]): - train_dataset = benchmark.get_user_dataset("pubmedqa") + train_dataset = prepare_data(DATASET[dataset_name]) spec = GenerativeModelSpecification(max_steps=5) spec.generate_stat_spec_from_data(dataset=train_dataset) diff --git a/tests/test_specification/text_generative_utils.py b/tests/test_specification/text_generative_utils.py new file mode 100644 index 00000000..e17fad63 --- /dev/null +++ b/tests/test_specification/text_generative_utils.py @@ -0,0 +1,57 @@ +from typing import List +from datasets import load_dataset + +DATASET = { + "medmcqa": "openlifescienceai/medmcqa", + "pubmedqa": "bigbio/pubmed_qa,pubmed_qa_labeled_fold0_source", +} + +def preprocess_medmcqa(doc) -> str: + """ + Question: + Choices: + A. + B. + C. + D. + Answer: + """ + choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]] + option_choices = { + "A": choices[0], + "B": choices[1], + "C": choices[2], + "D": choices[3], + } + + prompt = "Question: " + doc["question"] + "\nChoices:\n" + for choice, option in option_choices.items(): + prompt += f"{choice.upper()}. {option}\n" + prompt += "Answer:" + return prompt + +def preprocess_pubmedqa(doc) -> str: + ctxs = "\n".join(doc["CONTEXTS"]) + return "Abstract: {}\nQuestion: {}\nAnswer:".format( + ctxs, + doc["QUESTION"], + ) + +PROCESS_FUNC = { + # medical user + "openlifescienceai/medmcqa": preprocess_medmcqa, + "bigbio/pubmed_qa": preprocess_pubmedqa, +} + +def prepare_data(dataset_name_str): + temp_list = dataset_name_str.split(",") + subset_name = None + if len(temp_list) != 1: + subset_name = temp_list[1] + dataset_name = temp_list[0] + if subset_name: + test_dataset = load_dataset(dataset_name, subset_name, split="test") + else: + test_dataset = load_dataset(dataset_name, split="test") + test_dataset = test_dataset.map(lambda x: {"text": PROCESS_FUNC[dataset_name](x)}) + return test_dataset From 4636e996ca2a7735fdaed0c8761541a4529205ed Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Thu, 9 Jan 2025 21:46:52 +0800 Subject: [PATCH 057/108] [FIX] Work around trl package bug with multi-GPU parallelism --- learnware/specification/regular/text/generative.py | 4 +++- tests/test_specification/test_text_generative.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index d16000b1..bd658ef3 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -149,7 +149,7 @@ def _init_tokenizer_model(self): model = get_peft_model(model, peft_config) - # TODO: Load adpater weight from online + # TODO: Load adpater weight from Beimingwu for n, p in model.named_parameters(): if "lora_A" in n: @@ -168,6 +168,8 @@ def _init_trainer(self, model, tokenizer, train_dataset, args): weight_decay_l1=self.__extra_args["weight_decay_l1"], args=args, ) + # Work around trl package bug with multi-GPU parallelism + trainer.args._n_gpu = 1 return trainer diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 70e03d7f..8313a92d 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -33,7 +33,7 @@ def _test_with_X(X): spec2 = GenerativeModelSpecification() spec2.load(spec_path) - torch.testing.assert_close(task_vector, spec2.task_vector) + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" @@ -54,7 +54,7 @@ def _test_with_dataset(dataset): spec2 = GenerativeModelSpecification() spec2.load(spec_path) - torch.testing.assert_close(task_vector, spec2.task_vector) + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" def test_generating_spec(self): From 63a7079ef65fb1d69fd9c4277edf60d78f2d1429 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Fri, 10 Jan 2025 12:09:11 +0800 Subject: [PATCH 058/108] [MNT] Adding Dependencies --- setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.py b/setup.py index f2da971c..23642197 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,10 @@ def get_version(rel_path: str) -> str: "portalocker>=2.0.0", "qpsolvers[clarabel]>=4.0.1", "geatpy>=2.7.0;python_version<'3.11'", + "trl>=0.13.0", + "datasets>=3.2.0", + "peft>=0.14.0", + "lm_eval>=0.4.7" ] DEV_REQUIRED = [ From 339258539bd3535a35e4cae0fa23fd21d0f390ec Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Fri, 10 Jan 2025 18:16:32 +0800 Subject: [PATCH 059/108] [FIX] add `trust_remote_code` parameter to fix dataset loading --- tests/test_specification/text_generative_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_specification/text_generative_utils.py b/tests/test_specification/text_generative_utils.py index e17fad63..78b2347b 100644 --- a/tests/test_specification/text_generative_utils.py +++ b/tests/test_specification/text_generative_utils.py @@ -50,8 +50,8 @@ def prepare_data(dataset_name_str): subset_name = temp_list[1] dataset_name = temp_list[0] if subset_name: - test_dataset = load_dataset(dataset_name, subset_name, split="test") + test_dataset = load_dataset(dataset_name, subset_name, split="test", trust_remote_code=True) else: - test_dataset = load_dataset(dataset_name, split="test") + test_dataset = load_dataset(dataset_name, split="test", trust_remote_code=True) test_dataset = test_dataset.map(lambda x: {"text": PROCESS_FUNC[dataset_name](x)}) return test_dataset From 4ba0f98e0a100d2e1b1af068a140067e919d21fa Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Fri, 10 Jan 2025 20:44:03 +0800 Subject: [PATCH 060/108] [FIX] fix bug in func `parse_specification_type` --- learnware/market/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/utils.py b/learnware/market/utils.py index b62ca3da..c13e2049 100644 --- a/learnware/market/utils.py +++ b/learnware/market/utils.py @@ -3,7 +3,7 @@ def parse_specification_type( spec_list=[ "HeteroMapTableSpecification", "RKMETableSpecification", - "GenerativeModelSpecification" + "GenerativeModelSpecification", "RKMETextSpecification", "RKMEImageSpecification", "LLMGeneralCapabilitySpecification", From 30153079c36ad6895e7206c45ff0175311e97f52 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Fri, 10 Jan 2025 20:59:28 +0800 Subject: [PATCH 061/108] [FIX] fix variable name conflicts bug --- learnware/market/easy/checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index ee9f5731..c7b19f52 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -168,8 +168,8 @@ def __call__(self, learnware): elif spec_type == "RKMETextSpecification" or spec_type == "GenerativeModelSpecification": if semantic_spec["Model Type"]["Values"][0] != "Others": - len = random.randint(10, 1000) - inputs = EasyStatChecker._generate_random_text_list(10, "en", len, len) + len_ = random.randint(10, 1000) + inputs = EasyStatChecker._generate_random_text_list(10, "en", len_, len_) else: inputs = EasyStatChecker._generate_random_text_list(10) From 9eec90230ad73738c5dc340175d4b4e94deb7be8 Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Sat, 11 Jan 2025 17:50:12 +0800 Subject: [PATCH 062/108] [FIX] add text generation task type --- learnware/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/learnware/config.py b/learnware/config.py index a94e3b6f..53f246ef 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -92,6 +92,7 @@ def get_platform(): "Feature Extraction", "Segmentation", "Object Detection", + "Text Generation" "Others", ], "Type": "Class", # Choose only one class From 8a86dba10ce3bd3e75407b57a03405ac29630e7e Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Sat, 11 Jan 2025 18:22:40 +0800 Subject: [PATCH 063/108] [FIX] bug fix: add comma --- learnware/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/config.py b/learnware/config.py index 53f246ef..24443146 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -92,7 +92,7 @@ def get_platform(): "Feature Extraction", "Segmentation", "Object Detection", - "Text Generation" + "Text Generation", "Others", ], "Type": "Class", # Choose only one class From f788a667402fca8bc1948117606532cdcf90cfdd Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sat, 11 Jan 2025 21:18:23 +0800 Subject: [PATCH 064/108] [FIX] update package versions in setup.py for python3.8 compatibility --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 23642197..4df4444d 100644 --- a/setup.py +++ b/setup.py @@ -53,9 +53,9 @@ def get_version(rel_path: str) -> str: "portalocker>=2.0.0", "qpsolvers[clarabel]>=4.0.1", "geatpy>=2.7.0;python_version<'3.11'", - "trl>=0.13.0", - "datasets>=3.2.0", - "peft>=0.14.0", + "trl>=0.11.4", + "datasets>=3.1.0", + "peft>=0.13.2", "lm_eval>=0.4.7" ] From e43c2033dc7c433121396ef59efcd27c289321dd Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Wed, 15 Jan 2025 20:58:05 +0800 Subject: [PATCH 065/108] [FIX] simplify applicability checks in `EasyExactSemanticSearcher` and `EasyFuzzSemanticSearcher`; correct index usage in `LLMEasyOrganizer` --- learnware/market/easy/searcher.py | 12 ++++-------- learnware/market/llm/organizer.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 77f30727..179ed6ed 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -24,12 +24,10 @@ class EasyExactSemanticSearcher(AtomicSearcher): def is_applicable_learnware(self, learnware: Learnware) -> bool: - learnware_semantic_spec = learnware.specification.get_semantic_spec - return learnware_semantic_spec + return True def is_applicable_user(self, user_info: BaseUserInfo) -> bool: - user_semantic_spec = user_info.get_semantic_spec() - return user_semantic_spec + return True def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] @@ -95,12 +93,10 @@ def __call__(self, learnware_list: List[Learnware], user_info: BaseUserInfo) -> class EasyFuzzSemanticSearcher(AtomicSearcher): def is_applicable_learnware(self, learnware: Learnware) -> bool: - learnware_semantic_spec = learnware.specification.get_semantic_spec - return learnware_semantic_spec + return True def is_applicable_user(self, user_info: BaseUserInfo) -> bool: - user_semantic_spec = user_info.get_semantic_spec() - return user_semantic_spec + return True def _learnware_id_search(self, learnware_id: str, learnware_list: List[Learnware]) -> List[Learnware]: match_learnwares = [] diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index 79e63929..85e55f77 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -56,8 +56,8 @@ def _update_learnware_general_capability_spec( "kwargs": {}, } - zip_path = self.learnware_zip_list[id] - folder_dir = self.learnware_folder_list[id] + zip_path = self.learnware_zip_list[idx] + folder_dir = self.learnware_folder_list[idx] self.learnware_list[idx].update_stat_spec(general_capability_spec.type, general_capability_spec) with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: From a5153189e3f7c6025edcb6675e5a7d9d57c538c1 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Wed, 15 Jan 2025 22:49:50 +0800 Subject: [PATCH 066/108] [FIX] initial implementation of `__call__` func of `LLMStatSearcher` --- learnware/market/easy/searcher.py | 3 +- learnware/market/llm/searcher.py | 50 ++++++++++++++++++------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 179ed6ed..6a0d579f 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -621,7 +621,7 @@ def __call__( search_method: str = "greedy", ) -> SearchResults: self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info, spec_list=self.SPEC_TYPES) - print(self.stat_spec_type, self.SPEC_TYPES) + user_rkme = user_info.stat_info[self.stat_spec_type] learnware_list = self._filter_by_rkme_spec_metadata(learnware_list, user_rkme) @@ -750,7 +750,6 @@ def __call__( filtered_learnware_list = [ learnware for learnware in learnware_list if stat_searcher.is_applicable_learnware(learnware) ] - # print(f"Using searcher: {stat_searcher.__class__}, filtered learnware_list: {len(filtered_learnware_list)}") return stat_searcher(filtered_learnware_list, user_info, max_search_num, search_method) return semantic_search_result diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 6421701d..32446d5a 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -4,14 +4,15 @@ from learnware.learnware.base import Learnware from learnware.specification.base import Specification - -from ..base import BaseUserInfo, SearchResults, AtomicSearcher +from ..utils import parse_specification_type +from ..base import BaseUserInfo, MultipleSearchItem, SearchResults, AtomicSearcher, SingleSearchItem +from ..easy import EasyStatSearcher from ...logger import get_module_logger logger = get_module_logger("llm_searcher") -class LLMStatSearcher(AtomicSearcher): +class LLMStatSearcher(EasyStatSearcher): SPEC_TYPES = ["GenerativeModelSpecification"] def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> bool: @@ -41,28 +42,37 @@ def is_applicable_user(self, user_info: BaseUserInfo, verbose: bool = True) -> b def __call__( self, + learnware_list: List[Learnware], user_info: BaseUserInfo, - check_status: Optional[int] = None, max_search_num: int = 5, search_method: str = "greedy", ) -> SearchResults: - """Employ LLM learnware search based on user_info from learnwares with check_status. + self.stat_spec_type = parse_specification_type(stat_specs=user_info.stat_info, spec_list=self.SPEC_TYPES) - Parameters - ---------- - user_info : BaseUserInfo - user_info contains semantic_spec and stat_info - check_status : int, optional - - None: search from all learnwares - - Others: search from learnwares with check_status + user_spec = user_info.stat_info[self.stat_spec_type] - Returns - ------- - Tuple[List[float], List[Learnware]] - the first is the sorted list of rkme dist - the second is the sorted list of Learnware (single) by the rkme dist - """ - pass + sorted_dist_list, single_learnware_list = self._search_by_taskvector_spec_single(learnware_list, user_spec) + if len(single_learnware_list) == 0: + return SearchResults() + + sorted_score_list = self._convert_dist_to_score(sorted_dist_list) + + logger.info( + f"After search by user spec, learnware_list length is {len(learnware_list)}" + ) + + if len(single_learnware_list) == 1 and sorted_score_list[0] < 0.6: + sorted_score_list[0] = 0.6 + + search_results = SearchResults() + search_results.update_single_results( + [ + SingleSearchItem(learnware=_learnware, score=_score) + for _score, _learnware in zip(sorted_score_list, single_learnware_list) + ] + ) + + return search_results def _search_by_taskvector_spec_single( self, @@ -76,7 +86,7 @@ def _search_by_taskvector_spec_single( ---------- learnware_list : List[Learnware] The list of learnwares whose mixture approximates the user's rkme - user_rkme : Union[RKMETableSpecification, RKMEImageSpecification, RKMETextSpecification] + user_spec : GenerativeModelSpecification user Task Vector statistical specification stat_spec_type : str GenerativeModelSpecification by default. From a730f3e354856895bded227b42422afd21366ae1 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Thu, 16 Jan 2025 16:02:18 +0800 Subject: [PATCH 067/108] [FIX] Fix the generation logic of `remain_config_list` to ensure that `LLMBenchmarkConfig` objects are returned instead of names --- .../specification/system/llm_general_capability_spec/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 1f71f8d6..fe82aee6 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -81,7 +81,7 @@ def generate_stat_spec_from_system( if general_spec: exist_config_list = list(general_spec.score_dict.keys()) self.score_dict = general_spec.score_dict.copy() - remain_config_list = [config.name for config in benchmark_configs if config.name not in exist_config_list] + remain_config_list = [config for config in benchmark_configs if config.name not in exist_config_list] if remain_config_list: results = self._evaluate(learnware, remain_config_list) for config in remain_config_list: From 0c91b29135f8745498bedb49482dd8aee8f7a36e Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Thu, 16 Jan 2025 16:12:52 +0800 Subject: [PATCH 068/108] [FIX] add model type to default semantic specification --- learnware/config.py | 9 +++++++++ learnware/market/easy/checker.py | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/learnware/config.py b/learnware/config.py index 24443146..f67e053d 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -97,6 +97,15 @@ def get_platform(): ], "Type": "Class", # Choose only one class }, + "Model": { + "Values": [ + "Base Model", + "Fine-tuned Model", + "Adapter Model", + "Others", + ], + "Type": "Optional", + }, "Library": { "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"], "Type": "Class", diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 3b22c9bb..508737e8 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -18,11 +18,16 @@ class EasySemanticChecker(BaseChecker): def check_semantic_spec(semantic_spec): try: for key in C["semantic_specs"]: + if C["semantic_specs"][key]["Type"] == "Optional": + if key not in semantic_spec: + continue + pass + value = semantic_spec[key]["Values"] valid_type = C["semantic_specs"][key]["Type"] assert semantic_spec[key]["Type"] == valid_type, f"{key} type mismatch" - if valid_type == "Class": + if valid_type == "Class" or valid_type == "Optional": valid_list = C["semantic_specs"][key]["Values"] assert len(value) == 1, f"{key} must be unique" assert value[0] in valid_list, f"{key} must be in {valid_list}" From cd7cf1667512d7b1c7280ab62f0331ea7a51f8e6 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Thu, 16 Jan 2025 21:48:07 +0800 Subject: [PATCH 069/108] [ENH] Download models from beimingwu --- .../specification/regular/text/generative.py | 71 +++++++++++++------ .../test_text_generative.py | 16 ++++- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index bd658ef3..f4d7f198 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -1,15 +1,18 @@ from __future__ import annotations +import os +import random import tempfile from typing import Any, Dict, List, Optional, Union +import numpy as np import trl import torch from torch import nn from trl import SFTConfig -from peft import LoraConfig +from peft import LoraConfig, PeftModel from datasets import Dataset from transformers import ( @@ -97,6 +100,7 @@ def generate_stat_spec_from_data( dataset_text_field="text", X: List[str] = None, verbose: bool = True, + beimingwu = True, **kwargs ): """Initializing Task Vector Specification's parameters. @@ -113,7 +117,7 @@ def generate_stat_spec_from_data( dataset = Dataset.from_dict({dataset_text_field: X}) with tempfile.TemporaryDirectory() as temp_dir: - tokenizer, model = self._init_tokenizer_model() + tokenizer, model = self._init_tokenizer_model(beimingwu) trainer_config = self._trainer_config(temp_dir, dataset_text_field) trainer = self._init_trainer(model, tokenizer, dataset, trainer_config) @@ -126,35 +130,46 @@ def generate_stat_spec_from_data( ]) - def _init_tokenizer_model(self): + def _init_tokenizer_model(self, beimingwu): """ Initialize foundational model (e.g. Qwen) used for task vector generation. And, this method should not be overridden if the specification needs to be submitted to Beimingwu. """ - tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen2.5-0.5B") + if beimingwu: + base_model_path = os.path.expanduser("~/Meta/saved-learnwares/saved-PTM") + else: + base_model_path = "Qwen/Qwen2.5-0.5B" + + set_seed(3407) + tokenizer = Qwen2Tokenizer.from_pretrained(base_model_path) model = Qwen2ForCausalLM.from_pretrained( - "Qwen/Qwen2.5-0.5B", + base_model_path, attn_implementation=self.attn_implementation, torch_dtype=torch.bfloat16, ).to(self._device) - peft_config = LoraConfig( - r=16, - lora_alpha=32, - lora_dropout=0.1, - bias="none", - task_type="CAUSAL_LM", - target_modules=["q_proj", "k_proj", "v_proj"] - ) - - model = get_peft_model(model, peft_config) - - # TODO: Load adpater weight from Beimingwu - - for n, p in model.named_parameters(): - if "lora_A" in n: - p.requires_grad = False - + if beimingwu: + adapter_path = os.path.expanduser("~/Meta/saved-learnwares/saved-adapter") + model = PeftModel.from_pretrained(model, adapter_path) + + for n, p in model.named_parameters(): + if "lora_B" in n: + p.requires_grad = True + else: + peft_config = LoraConfig( + r=16, + lora_alpha=32, + lora_dropout=0.1, + bias="none", + task_type="CAUSAL_LM", + target_modules=["q_proj", "k_proj", "v_proj"] + ) + model = get_peft_model(model, peft_config) + + for n, p in model.named_parameters(): + if "lora_A" in n: + p.requires_grad = False + return tokenizer, model @@ -262,4 +277,14 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N loss = default_loss return (loss, outputs) if return_outputs else loss - \ No newline at end of file + + +def set_seed(seed): + random.seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True \ No newline at end of file diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 8313a92d..c8bdad1c 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -18,7 +18,7 @@ class TestGenerativeModelSpecification(unittest.TestCase): @staticmethod def _test_with_X(X): - spec = GenerativeModelSpecification(max_steps=5) + spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(X=X, dataset_text_field="txt") task_vector = spec.task_vector @@ -39,7 +39,7 @@ def _test_with_X(X): @staticmethod def _test_with_dataset(dataset): - spec = GenerativeModelSpecification(max_steps=5) + spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(dataset=dataset) task_vector = spec.task_vector @@ -56,19 +56,29 @@ def _test_with_dataset(dataset): torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" + + def test_loading_from_bwm(self): + spec = GenerativeModelSpecification() + _, model1 = spec._init_tokenizer_model(True) + _, model2 = spec._init_tokenizer_model(False) + + params1, params2 = dict(model1.named_parameters()), dict(model2.named_parameters()) + for k in model1.state_dict(): + torch.testing.assert_close(params1[k].cpu(), params2[k].cpu()) def test_generating_spec(self): train_dataset = prepare_data(DATASET["pubmedqa"]) self._test_with_X(train_dataset["text"]) self._test_with_dataset(train_dataset) + self._test_with_dataset(train_dataset, beimingwu=False) def test_searching_spec(self): specs, learnwares = [], [] for i, dataset_name in enumerate(["pubmedqa", "medmcqa"]): train_dataset = prepare_data(DATASET[dataset_name]) - spec = GenerativeModelSpecification(max_steps=5) + spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(dataset=train_dataset) specs.append(spec) From ace3702768ba3d2ab340ab8d6dec93156a7a7594 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Thu, 16 Jan 2025 21:54:46 +0800 Subject: [PATCH 070/108] [FIX] fix typo. --- learnware/market/llm/searcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 32446d5a..14faab0e 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -107,7 +107,7 @@ def _search_by_taskvector_spec_single( filtered_idx_list.append(idx) else: logger.warning( - f"The distance between user_spec and learnware_spec (id: {learnware_list[idx].id}) is not finite, where distance is {mmd_dist}" + f"The distance between user_spec and learnware_spec (id: {learnware_list[idx].id}) is not finite, where similarity is {similarity}" ) sorted_idx_list = reversed(sorted(range(len(similarity_list)), key=lambda k: similarity_list[k])) From 5b6445a8fa03e7652000c93cfc1047ff059ebadb Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Fri, 17 Jan 2025 13:00:49 +0800 Subject: [PATCH 071/108] [MNT] add general specification test --- tests/test_specification/test_general_spec.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_specification/test_general_spec.py diff --git a/tests/test_specification/test_general_spec.py b/tests/test_specification/test_general_spec.py new file mode 100644 index 00000000..f7253c21 --- /dev/null +++ b/tests/test_specification/test_general_spec.py @@ -0,0 +1,43 @@ +import json +import os +import tempfile +import unittest + +from learnware.tests.benchmarks.config import LLMBenchmarkConfig +from learnware.specification import LLMGeneralCapabilitySpecification +from learnware.client import LearnwareClient + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + +class TestGeneralCapabilitySpec(unittest.TestCase): + @staticmethod + def _test_general_spec(learnware, benchmark_configs): + spec = LLMGeneralCapabilitySpecification() + spec.generate_stat_spec_from_system(learnware=learnware, benchmark_configs=benchmark_configs) + + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + spec_path = os.path.join(tempdir, "general_spec.json") + spec.save(spec_path) + + with open(spec_path, "r") as f: + data = json.load(f) + assert data["type"] == "LLMGeneralCapabilitySpecification" + + spec2 = LLMGeneralCapabilitySpecification() + spec2.load(spec_path) + assert spec2.type == "LLMGeneralCapabilitySpecification" + + def test_general_spec(self): + client = LearnwareClient() + learnware = client.load_learnware(learnware_id="00002681") + benchmark_configs = [ + LLMBenchmarkConfig( + name="mmlu_anatomy", + eval_metric="acc", + ) + ] + self._test_general_spec(learnware=learnware, benchmark_configs=benchmark_configs) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 89d5338ce707046701165dbf2e5c17d0e49eebd2 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Fri, 17 Jan 2025 13:01:21 +0800 Subject: [PATCH 072/108] [MNT] Modify interfaces and add corresponding tests. --- learnware/specification/__init__.py | 6 +++--- learnware/specification/module.py | 13 ++++++++---- .../test_text_generative.py | 20 +++++++++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/learnware/specification/__init__.py b/learnware/specification/__init__.py index 8a94f2fe..f2573955 100644 --- a/learnware/specification/__init__.py +++ b/learnware/specification/__init__.py @@ -16,14 +16,14 @@ generate_rkme_table_spec = None generate_rkme_image_spec = None generate_rkme_text_spec = None - generate_task_vector_spec = None + generate_generative_model_spec = None generate_semantic_spec = None else: from .module import ( generate_rkme_image_spec, generate_rkme_table_spec, generate_rkme_text_spec, - generate_task_vector_spec, + generate_generative_model_spec, generate_semantic_spec, generate_stat_spec, ) @@ -43,7 +43,7 @@ "generate_rkme_image_spec", "generate_rkme_table_spec", "generate_rkme_text_spec", - "generate_task_vector_spec", + "generate_generative_model_spec", "generate_semantic_spec", "generate_stat_spec", ] diff --git a/learnware/specification/module.py b/learnware/specification/module.py index b5c87a5d..9f1f6992 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -4,6 +4,8 @@ import pandas as pd import torch +from datasets import Dataset + from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification, GenerativeModelSpecification from .utils import convert_to_numpy from ..config import C @@ -175,8 +177,10 @@ def generate_rkme_text_spec( return rkme_text_spec -def generate_task_vector_spec( - X: List[str], +def generate_generative_model_spec( + dataset: Optional[Dataset] = None, + dataset_text_field="text", + X: List[str] = None, verbose: bool = True, **kwargs ) -> GenerativeModelSpecification: @@ -184,9 +188,10 @@ def generate_task_vector_spec( if not isinstance(X, list) or not all(isinstance(item, str) for item in X): raise TypeError("Input data must be a list of strings.") - # Generate task vector spec + # Generate generative model spec task_vector_spec = GenerativeModelSpecification() - task_vector_spec.generate_stat_spec_from_data(X, verbose, **kwargs) + task_vector_spec.generate_stat_spec_from_data(dataset=dataset, dataset_text_field=dataset_text_field, X=X, verbose=verbose, **kwargs) + return task_vector_spec diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 8313a92d..4e8405d2 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -11,6 +11,7 @@ from learnware.learnware.base import Learnware from learnware.market.llm import LLMStatSearcher from learnware.specification.base import Specification +from learnware.specification.module import generate_generative_model_spec from learnware.specification.regular.text import GenerativeModelSpecification from text_generative_utils import DATASET, prepare_data @@ -56,6 +57,25 @@ def _test_with_dataset(dataset): torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" + + @staticmethod + def _test_with_generating_directly(X): + spec = generate_generative_model_spec(X=X, dataset_text_field="name") + + task_vector = spec.task_vector + + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + spec_path = os.path.join(tempdir, "spec.pth") + spec.save(spec_path) + + data = torch.load(spec_path, weights_only=True) + assert data["type"] == "GenerativeModelSpecification" + + spec2 = GenerativeModelSpecification() + spec2.load(spec_path) + + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) + assert spec2.type == "GenerativeModelSpecification" def test_generating_spec(self): train_dataset = prepare_data(DATASET["pubmedqa"]) From a42471a7aa620bb8141f30eae2f0f62cb1170c6f Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sat, 18 Jan 2025 13:53:45 +0800 Subject: [PATCH 073/108] [FIX] update `sentence_transformers` version to 3.2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4df4444d..a885663d 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ def get_version(rel_path: str) -> str: "torchvision==0.15.2", "torch-optimizer>=0.3.0", "lightgbm>=3.3.0", - "sentence_transformers==2.2.2", + "sentence_transformers==3.2.1", "fast_pytorch_kmeans==0.2.0.1", ] From 07a180a9301f910dcccda82550a26c35eb16c7cf Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sat, 18 Jan 2025 13:57:44 +0800 Subject: [PATCH 074/108] [MNT | FIX] adjust logic for checking base model --- learnware/learnware/__init__.py | 9 +----- learnware/market/easy/checker.py | 55 ++++++++++++++++---------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index ae18d065..0776d716 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -39,14 +39,7 @@ def get_learnware_from_dirpath( "required_learnware_ids": [], "kwargs": {}, }, - "stat_specifications": [ - { - "module_path": "learnware.specification", - "class_name": "RKMETableSpecification", - "file_name": "stat_spec.json", - "kwargs": {}, - }, - ], + "stat_specifications": [], } try: diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index c7b19f52..4ad5419c 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -138,6 +138,26 @@ def __call__(self, learnware): logger.warning(message) return self.INVALID_LEARNWARE, message + # check llm base model learnware general capability + if ( + semantic_spec["Data"]["Values"] == ["Text"] + and semantic_spec["Task"]["Values"] == ["Text Generation"] + and semantic_spec["Model Type"]["Values"] == ["Base Model"] + ): + try: + general_capability_spec = LLMGeneralCapabilitySpecification() + general_capability_spec.generate_stat_spec_from_system( + learnware=learnware, benchmark_configs=general_capability_benchmark_configs[:1] + ) + learnware.update_stat_spec(general_capability_spec.type, general_capability_spec) + except Exception: + message = ( + f"The learnware [{learnware.id}] llm base model general capability evaluation is not available!" + ) + logger.warning(message) + message += "\r\n" + traceback.format_exc() + return self.INVALID_LEARNWARE, message + # Check statistical specification spec_type = parse_specification_type(learnware.get_specification().stat_spec) if spec_type is None: @@ -146,12 +166,13 @@ def __call__(self, learnware): return self.INVALID_LEARNWARE, message # Check if statistical specification is computable in dist() - stat_spec = learnware.get_specification().get_stat_spec_by_name(spec_type) - distance = float(stat_spec.dist(stat_spec)) - if not np.isfinite(distance): - message = f"The distance between statistical specifications is not finite, where distance={distance}" - logger.warning(message) - return self.INVALID_LEARNWARE, message + if spec_type != "LLMGeneralCapabilitySpecification": + stat_spec = learnware.get_specification().get_stat_spec_by_name(spec_type) + distance = float(stat_spec.dist(stat_spec)) + if not np.isfinite(distance): + message = f"The distance between statistical specifications is not finite, where distance={distance}" + logger.warning(message) + return self.INVALID_LEARNWARE, message if spec_type == "RKMETableSpecification": if not isinstance(input_shape, tuple) or not all(isinstance(item, int) for item in input_shape): @@ -165,7 +186,7 @@ def __call__(self, learnware): return self.INVALID_LEARNWARE, message inputs = np.random.randn(10, *input_shape) - elif spec_type == "RKMETextSpecification" or spec_type == "GenerativeModelSpecification": + elif spec_type in ["RKMETextSpecification", "GenerativeModelSpecification", "LLMGeneralCapabilitySpecification"]: if semantic_spec["Model Type"]["Values"][0] != "Others": len_ = random.randint(10, 1000) @@ -249,26 +270,6 @@ def __call__(self, learnware): logger.warning(message) return self.INVALID_LEARNWARE, message - # check llm base model learnware general capability - if ( - semantic_spec["Data"]["Values"] == ["Text"] - and semantic_spec["Task"]["Values"] == ["Text Generation"] - and semantic_spec["Model Type"]["Values"] == ["Base Model"] - ): - try: - general_capability_spec = LLMGeneralCapabilitySpecification() - general_capability_spec.generate_stat_spec_from_system( - learnware=learnware, benchmark_configs=general_capability_benchmark_configs[:2] - ) - learnware.update_stat_spec(general_capability_spec.type, general_capability_spec) - except Exception: - message = ( - f"The learnware [{learnware.id}] llm base model general capability evaluation is not available!" - ) - logger.warning(message) - message += "\r\n" + traceback.format_exc() - return self.INVALID_LEARNWARE, message - except Exception as e: message = f"The learnware [{learnware.id}] is not valid! Due to {repr(e)}." logger.warning(message) From f3735b01bafd77f9e195e282175a446f236d0d60 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sat, 18 Jan 2025 14:05:38 +0800 Subject: [PATCH 075/108] [ENH] add `get_model` method to retrieve the `nn.Module` object in `BaseModel` --- learnware/model/base.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/learnware/model/base.py b/learnware/model/base.py index 74e3860c..98d990a1 100644 --- a/learnware/model/base.py +++ b/learnware/model/base.py @@ -45,3 +45,11 @@ def finetune(self, X: np.ndarray, y: np.ndarray): labels for finetuning """ pass + + def get_model(self): + """Get the nn.Module object + + Returns: + nn.Module: The model object, such as a PreTrainedModel from the transformers library. + """ + pass From c3ea59fd493d0580b6cbb3565dfddfc1ca2ace05 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Mon, 20 Jan 2025 00:37:25 +0800 Subject: [PATCH 076/108] [MNT] add general_spec test of checker and organizer and modify some implementation --- learnware/market/easy/checker.py | 4 +- learnware/market/llm/organizer.py | 2 +- .../llm_general_capability_spec/config.py | 132 ++++++++++++++++-- .../llm_general_capability_spec/spec.py | 71 ++++++---- learnware/tests/benchmarks/__init__.py | 2 + learnware/tests/benchmarks/config.py | 1 + tests/test_specification/test_general_spec.py | 43 ++++-- 7 files changed, 209 insertions(+), 46 deletions(-) diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index c7b19f52..f9e365f9 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -10,7 +10,7 @@ from ...config import C from ...logger import get_module_logger from ...specification import LLMGeneralCapabilitySpecification -from ...specification.system.llm_general_capability_spec.config import general_capability_benchmark_configs +from ...specification.system.llm_general_capability_spec.config import test_benchmark_configs logger = get_module_logger("easy_checker", "INFO") @@ -258,7 +258,7 @@ def __call__(self, learnware): try: general_capability_spec = LLMGeneralCapabilitySpecification() general_capability_spec.generate_stat_spec_from_system( - learnware=learnware, benchmark_configs=general_capability_benchmark_configs[:2] + learnware=learnware, benchmark_configs=test_benchmark_configs ) learnware.update_stat_spec(general_capability_spec.type, general_capability_spec) except Exception: diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index 85e55f77..89a96fef 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -31,7 +31,7 @@ def update_learnware( return final_status def _update_learnware_general_capability_spec( - self, ids: Union[str, List[str]], benchmark_configs: List[BenchmarkConfig] + self, ids: Union[str, List[str]] ): """Update learnware by ids, attempting to generate LLMGeneralCapabilitySpecification for them. diff --git a/learnware/specification/system/llm_general_capability_spec/config.py b/learnware/specification/system/llm_general_capability_spec/config.py index 80f4e084..da0c9fe7 100644 --- a/learnware/specification/system/llm_general_capability_spec/config.py +++ b/learnware/specification/system/llm_general_capability_spec/config.py @@ -1,50 +1,166 @@ from typing import List +import numpy as np from ....tests.benchmarks import LLMBenchmarkConfig -general_capability_benchmark_configs: List[LLMBenchmarkConfig] = [ +# Score normalization functions, copied from the interactive notebook in https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization + +def normalize_within_range(value, lower_bound=0, higher_bound=1): + return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100 + +def compute_bbh_score(data): + bbh_subtasks = { + "sports_understanding": 2, + "tracking_shuffled_objects_three_objects": 3, + "navigate": 2, + "snarks": 2, + "date_understanding": 6, + "reasoning_about_colored_objects": 18, + "object_counting": 19, + "logical_deduction_seven_objects": 7, + "geometric_shapes": 11, + "web_of_lies": 2, + "movie_recommendation": 6, + "logical_deduction_five_objects": 5, + "salient_translation_error_detection": 6, + "disambiguation_qa": 3, + "temporal_sequences": 4, + "hyperbaton": 2, + "logical_deduction_three_objects": 3, + "causal_judgement": 2, + "formal_fallacies": 2, + "tracking_shuffled_objects_seven_objects": 7, + "ruin_names": 6, + "penguins_in_a_table": 5, + "boolean_expressions": 2, + "tracking_shuffled_objects_five_objects": 5 + } + # Normalize BBH subtasks scores + bbh_scores = [] + for subtask, num_choices in bbh_subtasks.items(): + subtask_key = f'leaderboard_bbh_{subtask}' + if subtask_key in data['results']: + bbh_raw_score = data['results'][subtask_key]['acc_norm,none'] + lower_bound = 1 / num_choices + normalized_score = normalize_within_range(bbh_raw_score, lower_bound, 1.0) + bbh_scores.append(normalized_score) + + # Average BBH score + bbh_score = sum(bbh_scores) / len(bbh_scores) + return round(bbh_score, 2) + +def compute_gpqa_score(data): + gpqa_subtasks = [ + "leaderboard_gpqa_diamond", + "leaderboard_gpqa_extended", + "leaderboard_gpqa_main" + ] + # Normalize GPQA scores + gpqa_raw_scores = [] + for subtask in gpqa_subtasks: + gpqa_raw_scores.append(data['results'][subtask]['acc_norm,none']) + gpqa_raw_score = sum(gpqa_raw_scores) / len(gpqa_raw_scores) + gpqa_score = normalize_within_range(gpqa_raw_score, 0.25, 1.0) + return round(gpqa_score, 2) + +def compute_ifeval_score(data): + # Compute IFEval + ifeval_inst_score = data['results']['leaderboard_ifeval']['inst_level_strict_acc,none'] * 100 + ifeval_prompt_score = data['results']['leaderboard_ifeval']['prompt_level_strict_acc,none'] * 100 + + # Average IFEval scores + ifeval_score = (ifeval_inst_score + ifeval_prompt_score) / 2 + return round(ifeval_score, 2) + +def compute_math_score(data): + math_subtasks = [ + "leaderboard_math_algebra_hard", + "leaderboard_math_counting_and_prob_hard", + "leaderboard_math_geometry_hard", + "leaderboard_math_intermediate_algebra_hard", + "leaderboard_math_num_theory_hard", + "leaderboard_math_prealgebra_hard", + "leaderboard_math_precalculus_hard" + ] + # Calculate the MATH score + math_raw_scores = [] + for subtask in math_subtasks: + math_raw_scores.append(data['results'][subtask]['exact_match,none']) + math_raw_score = sum(math_raw_scores) / len(math_raw_scores) + math_score = normalize_within_range(math_raw_score, 0, 1.0) + return round(math_score, 2) + +def compute_mmlu_pro_score(data): + # Normalize MMLU PRO scores + mmlu_pro_raw_score = data['results']['leaderboard_mmlu_pro']['acc,none'] + mmlu_pro_score = normalize_within_range(mmlu_pro_raw_score, 0.1, 1.0) + return round(mmlu_pro_score, 2) + +def compute_musr_score(data): + musr_subtasks = { + 'murder_mysteries': 2, + 'object_placements': 5, + 'team_allocation': 3 + } + # Normalize MUSR scores + musr_scores = [] + + for subtask, num_choices in musr_subtasks.items(): + musr_raw_score = data['results'][f'leaderboard_musr_{subtask}']['acc_norm,none'] + lower_bound = 1 / num_choices + normalized_score = normalize_within_range(musr_raw_score, lower_bound, 1.0) + musr_scores.append(normalized_score) + + musr_score = sum(musr_scores) / len(musr_scores) + return round(musr_score, 2) + + +test_benchmark_configs: List[LLMBenchmarkConfig] = [ LLMBenchmarkConfig( - name="mmlu", + name="mmlu_anatomy", dataset_path="hails/mmlu_no_train", validation_split="validation", test_split="test", eval_metric="acc", ), +] + +general_capability_benchmark_configs: List[LLMBenchmarkConfig] = [ LLMBenchmarkConfig( name="leaderboard_bbh", dataset_path="SaylorTwift/bbh", test_split="test", - eval_metric="acc_norm", + score_function=compute_bbh_score, ), LLMBenchmarkConfig( name="leaderboard_gpqa", dataset_path="Idavidrein/gpqa", test_split="train", - eval_metric="acc_norm", + score_function=compute_gpqa_score, ), LLMBenchmarkConfig( name="leaderboard_ifeval", dataset_path="wis-k/instruction-following-eval", test_split="train", - eval_metric="inst_level_strict_acc", + score_function=compute_ifeval_score, ), LLMBenchmarkConfig( name="leaderboard_math_hard", dataset_path="lighteval/MATH-Hard", train_split="train", test_split="test", - eval_metric="exact_match", + score_function=compute_math_score, ), LLMBenchmarkConfig( name="leaderboard_mmlu_pro", dataset_path="TIGER-Lab/MMLU-Pro", validation_split="validation", test_split="test", - eval_metric="acc", + score_function=compute_mmlu_pro_score, ), LLMBenchmarkConfig( name="leaderboard_musr", dataset_path="TAUR-Lab/MuSR", - eval_metric="acc_norm", + score_function=compute_musr_score, ), ] diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index fe82aee6..4d598e2c 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import List, Optional +from typing import List, Dict, Optional import lm_eval from lm_eval.models.huggingface import HFLM import codecs @@ -24,28 +24,43 @@ def __init__(self): super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) @staticmethod - def _evaluate(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]): - """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs. + def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]) -> Dict: + """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs and compute score dict. Parameters ---------- learnware : Learnware Learnware to generate General Capability Specification. benchmark_configs : Optional[List[LLMBenchmarkConfig]] - List of LLMBenchmarkConfig, set to self.benchmark_configs if None. + List of LLMBenchmarkConfig. + + Returns + ------- + Dict[LLMBenchmarkConfig, float] + Scores of all benchmark_configs. """ learnware.instantiate_model() base_model = learnware.get_model().get_model() - task_list = [config.name for config in benchmark_configs] - - lm_obj = HFLM(pretrained=base_model, batch_size=16) + lm_obj = HFLM(pretrained=base_model, batch_size="auto") task_manager = lm_eval.tasks.TaskManager() - results = lm_eval.simple_evaluate( - model=lm_obj, - tasks=task_list, - task_manager=task_manager, - ) - return results + + score_dict = {} + for config in benchmark_configs: + results = lm_eval.simple_evaluate( + model=lm_obj, + tasks=[config.name], + task_manager=task_manager, + ) + + if config.score_function: + score = config.score_function(results) + else: + score = results['results'][config.name][f'{config.eval_metric},none'] * 100 + score = round(score, 2) + logger.info(f"Name: {config.name}, Score: {score}") + score_dict[config.name] = score + + return score_dict def generate_stat_spec_from_system( self, @@ -66,26 +81,30 @@ def generate_stat_spec_from_system( """ if benchmark_configs: for config in benchmark_configs: - if config.eval_metric == None: - raise Exception("Must specify a evaluation metric in a LLMBenchmarkConfig object to evaluate learnware on it.") + if config.eval_metric == None and config.score_function == None: + raise Exception("Must specify an evaluation metric or a score computing function in a LLMBenchmarkConfig object to get the evaluation score.") else: + logger.info("No passed benchmark_configs. Set benchmark_configs by default.") benchmark_configs = self.benchmark_configs - self.score_dict = {} if update_existing: - results = self._evaluate(learnware, benchmark_configs) - for config in benchmark_configs: - self.score_dict[config.name] = results['results'][config.name][f'{config.eval_metric},none'] + logger.info("Update existing LLMGeneralCapabilitySpecification.") + self.score_dict = self._get_scores(learnware, benchmark_configs) else: - exist_config_list = [] + existing_config_names = [] + self.score_dict = {} general_spec = learnware.get_specification().get_stat_spec_by_name("LLMGeneralCapabilitySpecification") if general_spec: - exist_config_list = list(general_spec.score_dict.keys()) + existing_config_names = list(general_spec.score_dict.keys()) self.score_dict = general_spec.score_dict.copy() - remain_config_list = [config for config in benchmark_configs if config.name not in exist_config_list] - if remain_config_list: - results = self._evaluate(learnware, remain_config_list) - for config in remain_config_list: - self.score_dict[config.name] = results['results'][config.name][f'{config.eval_metric},none'] + logger.info("LLMGeneralCapabilitySpecification exists in learnware. Try to update...") + for k, v in general_spec.score_dict.items(): + logger.info(f"Existing scores: Name: {k}, Score: {v}") + new_configs = [config for config in benchmark_configs if config.name not in existing_config_names] + if new_configs: + new_score_dict = self._get_scores(learnware, new_configs) + self.score_dict.update(new_score_dict) + else: + logger.info("All LLMBenchmarkConfig have been evaluated before. No update.") def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index 25718815..0426d417 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -84,6 +84,7 @@ class LLMBenchmark: test_split: Optional[str] = None # evaluation options eval_metric: Optional[str] = None + score_function: Optional[Callable] = None # formatting / prompting options preprocess_function: Optional[Callable] = None response_template: Optional[str] = None @@ -239,6 +240,7 @@ def get_benchmark(self, benchmark_config: Union[str, BenchmarkConfig, LLMBenchma validation_split=benchmark_config.validation_split, test_split=benchmark_config.test_split, eval_metric=benchmark_config.eval_metric, + score_function=benchmark_config.score_function, preprocess_function=benchmark_config.preprocess_function, response_template=benchmark_config.response_template, ) diff --git a/learnware/tests/benchmarks/config.py b/learnware/tests/benchmarks/config.py index 714a7f7f..e5d45540 100644 --- a/learnware/tests/benchmarks/config.py +++ b/learnware/tests/benchmarks/config.py @@ -24,6 +24,7 @@ class LLMBenchmarkConfig: test_split: Optional[str] = None # evaluation options eval_metric: Optional[str] = None + score_function: Optional[Callable] = None # formatting / prompting options preprocess_function: Optional[Callable] = None response_template: Optional[str] = None diff --git a/tests/test_specification/test_general_spec.py b/tests/test_specification/test_general_spec.py index f7253c21..73338feb 100644 --- a/tests/test_specification/test_general_spec.py +++ b/tests/test_specification/test_general_spec.py @@ -3,11 +3,14 @@ import tempfile import unittest -from learnware.tests.benchmarks.config import LLMBenchmarkConfig +from learnware.specification.system.llm_general_capability_spec.config import test_benchmark_configs from learnware.specification import LLMGeneralCapabilitySpecification from learnware.client import LearnwareClient +from learnware.market import instantiate_learnware_market +from learnware.specification import generate_semantic_spec +from learnware.market import LearnwareMarket -os.environ["CUDA_VISIBLE_DEVICES"] = "0" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" class TestGeneralCapabilitySpec(unittest.TestCase): @staticmethod @@ -30,13 +33,35 @@ def _test_general_spec(learnware, benchmark_configs): def test_general_spec(self): client = LearnwareClient() learnware = client.load_learnware(learnware_id="00002681") - benchmark_configs = [ - LLMBenchmarkConfig( - name="mmlu_anatomy", - eval_metric="acc", - ) - ] - self._test_general_spec(learnware=learnware, benchmark_configs=benchmark_configs) + self._test_general_spec(learnware, test_benchmark_configs) + + @staticmethod + def _prepare_learnware_market() -> LearnwareMarket: + """initialize learnware market""" + llm_market = instantiate_learnware_market(market_id="llm_test", name="llm", rebuild=True) + semantic_spec = generate_semantic_spec( + name="Qwen/Qwen2.5-0.5B", + description="Qwen/Qwen2.5-0.5B", + data_type="Text", + model_type="Base Model", + task_type="Text Generation", + library_type="PyTorch", + scenarios=["Others"], + license="Others", + input_description=None, + output_description=None, + ) + client = LearnwareClient() + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: + zip_path = os.path.join(tempdir, "learnware.zip") + client.download_learnware(learnware_id="00002681", save_path=zip_path) + llm_market.add_learnware(zip_path, semantic_spec) + return llm_market + + def test_in_checker_organizer(self): + llm_market = self._prepare_learnware_market() + learnware_ids = llm_market.get_learnware_ids() + llm_market.learnware_organizer._update_learnware_general_capability_spec(learnware_ids) if __name__ == "__main__": From ab9550dbc20585ab27fb1497b1f543bdc0fc3f18 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Mon, 20 Jan 2025 11:41:26 +0800 Subject: [PATCH 077/108] [FIX] fix CUDA OOM bug --- .../specification/system/llm_general_capability_spec/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 4d598e2c..2f9dd539 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -41,11 +41,11 @@ def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig """ learnware.instantiate_model() base_model = learnware.get_model().get_model() - lm_obj = HFLM(pretrained=base_model, batch_size="auto") task_manager = lm_eval.tasks.TaskManager() score_dict = {} for config in benchmark_configs: + lm_obj = HFLM(pretrained=base_model, batch_size="auto") results = lm_eval.simple_evaluate( model=lm_obj, tasks=[config.name], From 8f0240502d222854a5defdaba74c10cd231e40db Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Mon, 20 Jan 2025 16:05:09 +0800 Subject: [PATCH 078/108] [FIX] get backend url from env in client --- learnware/client/learnware_client.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index eaeac526..a02d7082 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -61,9 +61,17 @@ def __init__(self, host=None, timeout=None): self.headers = None if host is None: - self.host = C.backend_host + host = os.environ.get("LEARNWARE_BACKEND_HOST") + if host is None: + self.host = C.backend_host + pass + else: + self.host = host + pass + pass else: self.host = host + pass self.chunk_size = 1024 * 1024 self.tempdir_list = [] From 32f51b3420df604247b86a7126bcfba146e5b253 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Tue, 21 Jan 2025 09:56:31 +0800 Subject: [PATCH 079/108] [FIX] Modify type checking in generate_generative_model_spec. --- learnware/specification/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 9f1f6992..3b2f1cdd 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -185,7 +185,7 @@ def generate_generative_model_spec( **kwargs ) -> GenerativeModelSpecification: # Check input type - if not isinstance(X, list) or not all(isinstance(item, str) for item in X): + if X is not None and not isinstance(X, list) or not all(isinstance(item, str) for item in X): raise TypeError("Input data must be a list of strings.") # Generate generative model spec From c38f4610f4bf4b1a61960bad82fe0b5b1f64572c Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Tue, 21 Jan 2025 10:33:26 +0800 Subject: [PATCH 080/108] [FIX] Modify type checking in generate_generative_model_spec. --- learnware/specification/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 3b2f1cdd..d58d3263 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -185,7 +185,7 @@ def generate_generative_model_spec( **kwargs ) -> GenerativeModelSpecification: # Check input type - if X is not None and not isinstance(X, list) or not all(isinstance(item, str) for item in X): + if X is not None and (not isinstance(X, list) or not all(isinstance(item, str) for item in X)): raise TypeError("Input data must be a list of strings.") # Generate generative model spec From 07a26e73620b98297343154264f748d18fac0f3c Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Wed, 22 Jan 2025 16:21:27 +0800 Subject: [PATCH 081/108] [MNT] add dist to TaskVectorSpecification --- learnware/specification/regular/base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index e7fb1f1b..89544556 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -27,4 +27,10 @@ def similarity(self, other: TaskVectorSpecification) -> float: """Compute cosine similarity between two task vectors. """ v1, v2 = self.task_vector, other.task_vector - return cosine_similarity(v1, v2, dim=0) \ No newline at end of file + return cosine_similarity(v1, v2, dim=0) + + def dist(self, other: BaseStatSpecification): + v1, v2 = self.task_vector, other.task_vector + + similarity = cosine_similarity(v1, v2, dim=0) # [-1, 1] + return (-similarity + 1) / 2 \ No newline at end of file From 1fa823d73b1f9c4e2383d0e105bcfcd9fc53b022 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Thu, 23 Jan 2025 13:33:46 +0800 Subject: [PATCH 082/108] [ENH] Add _convert_similarity_to_score --- learnware/market/llm/searcher.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 14faab0e..b3c7c2a5 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -1,6 +1,7 @@ from typing import List, Optional, Tuple, Union import numpy as np +import torch from learnware.learnware.base import Learnware from learnware.specification.base import Specification @@ -9,6 +10,8 @@ from ..easy import EasyStatSearcher from ...logger import get_module_logger +from torch.nn.functional import softmax + logger = get_module_logger("llm_searcher") @@ -51,11 +54,14 @@ def __call__( user_spec = user_info.stat_info[self.stat_spec_type] - sorted_dist_list, single_learnware_list = self._search_by_taskvector_spec_single(learnware_list, user_spec) + sorted_metric_list, single_learnware_list = self._search_by_taskvector_spec_single(learnware_list, user_spec) if len(single_learnware_list) == 0: return SearchResults() - sorted_score_list = self._convert_dist_to_score(sorted_dist_list) + if self.stat_spec_type == "GenerativeModelSpecification": + sorted_score_list = self._convert_similarity_to_score(sorted_metric_list) + else: + sorted_score_list = self._convert_dist_to_score(sorted_metric_list) logger.info( f"After search by user spec, learnware_list length is {len(learnware_list)}" @@ -110,8 +116,17 @@ def _search_by_taskvector_spec_single( f"The distance between user_spec and learnware_spec (id: {learnware_list[idx].id}) is not finite, where similarity is {similarity}" ) - sorted_idx_list = reversed(sorted(range(len(similarity_list)), key=lambda k: similarity_list[k])) + sorted_idx_list = list(reversed(sorted(range(len(similarity_list)), key=lambda k: similarity_list[k]))) sorted_dist_list = [similarity_list[idx] for idx in sorted_idx_list] sorted_learnware_list = [learnware_list[filtered_idx_list[idx]] for idx in sorted_idx_list] - return sorted_dist_list, sorted_learnware_list \ No newline at end of file + return sorted_dist_list, sorted_learnware_list + + def _convert_similarity_to_score(self, sorted_similarity_list, temperature=0.05): + sorted_similarity = torch.asarray(sorted_similarity_list) + sorted_similarity = torch.stack([ + sorted_similarity, torch.zeros_like(sorted_similarity) + ]) + + scores = softmax(sorted_similarity / temperature, dim=0)[0].tolist() + return scores * 100 \ No newline at end of file From 3516d70bae75a01c445f6d690f9d6b929af77114 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sat, 15 Feb 2025 21:55:24 +0800 Subject: [PATCH 083/108] [MNT] add exception detection in general_spec generation --- .../llm_general_capability_spec/spec.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 2f9dd539..26e6b6d5 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -1,4 +1,5 @@ from __future__ import annotations +import traceback from typing import List, Dict, Optional import lm_eval from lm_eval.models.huggingface import HFLM @@ -13,6 +14,7 @@ logger = get_module_logger("llm_general_capability_spec") +os.environ["TOKENIZERS_PARALLELISM"] = "false" class LLMGeneralCapabilitySpecification(SystemStatSpecification): """Large Language Model General Capability Specification""" @@ -45,21 +47,27 @@ def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig score_dict = {} for config in benchmark_configs: - lm_obj = HFLM(pretrained=base_model, batch_size="auto") - results = lm_eval.simple_evaluate( - model=lm_obj, - tasks=[config.name], - task_manager=task_manager, - ) + try: + lm_obj = HFLM(pretrained=base_model, batch_size="auto") + results = lm_eval.simple_evaluate( + model=lm_obj, + tasks=[config.name], + task_manager=task_manager, + ) + + if config.score_function: + score = config.score_function(results) + else: + score = results['results'][config.name][f'{config.eval_metric},none'] * 100 + score = round(score, 2) + logger.info(f"Name: {config.name}, Score: {score}") + score_dict[config.name] = score + + except Exception as e: + traceback.print_exc() + message = f"Evaluation of {config.name} failed! Due to {repr(e)}." + logger.warning(message) - if config.score_function: - score = config.score_function(results) - else: - score = results['results'][config.name][f'{config.eval_metric},none'] * 100 - score = round(score, 2) - logger.info(f"Name: {config.name}, Score: {score}") - score_dict[config.name] = score - return score_dict def generate_stat_spec_from_system( From 3824e24e9ede6180a836fed1849eef8c20401abd Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Fri, 14 Mar 2025 15:33:04 +0800 Subject: [PATCH 084/108] final test of llm_market --- learnware/client/learnware_client.py | 2 +- learnware/config.py | 2 +- learnware/learnware/__init__.py | 2 +- learnware/market/easy/checker.py | 8 ++++---- learnware/market/llm/organizer.py | 17 ++++------------- learnware/specification/module.py | 2 +- .../system/llm_general_capability_spec/spec.py | 5 +++++ 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index a6e52109..89aa34c0 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -52,7 +52,7 @@ class SemanticSpecificationKey(Enum): DATA_TYPE = "Data" TASK_TYPE = "Task" LIBRARY_TYPE = "Library" - MODEL_TYPE = "Model Type" + MODEL_TYPE = "Model" SENARIOES = "Scenario" LICENSE = "License" diff --git a/learnware/config.py b/learnware/config.py index 200f6fff..7df5c38b 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -101,7 +101,7 @@ def get_platform(): "Values": [ "Base Model", "Fine-tuned Model", - "Adapter Model", + "PEFT Model", "Others", ], "Type": "Optional", diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index 0776d716..31bd8f81 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -72,7 +72,7 @@ def get_learnware_from_dirpath( learnware_weights_path ), f"Weights are not found for the Text Generation Model learnware_{id}, please check the learnware.yaml or zipfile." - if semantic_spec["Model Type"]["Values"] == ["PEFT Model"]: + if semantic_spec["Model"]["Values"] == ["PEFT Model"]: assert "required_learnware_ids" in learnware_config["model"], f"'required_learnware_ids' is not found for the PEFT Model learnware_{id}, please check the learnware.yaml." assert len(learnware_config["model"]["required_learnware_ids"]) != 0, f"'required_learnware_ids' can't be empty for the PEFT Model learnware_{id}, please check the learnware.yaml." diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 2031f42c..8e630140 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -58,7 +58,7 @@ def check_semantic_spec(semantic_spec): "Others", ] - assert semantic_spec["Model Type"]["Values"][0] == "Others" + assert semantic_spec["Model"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Image": assert semantic_spec["Task"]["Values"][0] in [ @@ -70,7 +70,7 @@ def check_semantic_spec(semantic_spec): "Others", ] - assert semantic_spec["Model Type"]["Values"][0] == "Others" + assert semantic_spec["Model"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Text": assert semantic_spec["Task"]["Values"][0] in [ @@ -147,7 +147,7 @@ def __call__(self, learnware): if ( semantic_spec["Data"]["Values"] == ["Text"] and semantic_spec["Task"]["Values"] == ["Text Generation"] - and semantic_spec["Model Type"]["Values"] == ["Base Model"] + and semantic_spec["Model"]["Values"] == ["Base Model"] ): try: general_capability_spec = LLMGeneralCapabilitySpecification() @@ -193,7 +193,7 @@ def __call__(self, learnware): elif spec_type in ["RKMETextSpecification", "GenerativeModelSpecification", "LLMGeneralCapabilitySpecification"]: - if semantic_spec["Model Type"]["Values"][0] != "Others": + if semantic_spec["Model"]["Values"][0] != "Others": len_ = random.randint(10, 1000) inputs = EasyStatChecker._generate_random_text_list(10, "en", len_, len_) else: diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index 89a96fef..ddcbe4f0 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -17,18 +17,6 @@ class LLMEasyOrganizer(HeteroMapTableOrganizer): - # update base model learnware when llm benchmark list updates - def update_learnware( - self, - id: str, - zip_path: str = None, - semantic_spec: dict = None, - check_status: int = None, - ): - final_status = super(LLMEasyOrganizer, self).update_learnware(id, zip_path, semantic_spec, check_status) - if final_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(id)): - self._update_learnware_general_capability_spec(id) - return final_status def _update_learnware_general_capability_spec( self, ids: Union[str, List[str]] @@ -69,6 +57,9 @@ def _update_learnware_general_capability_spec( yaml_config = read_yaml_to_dict(learnware_yaml_path) if "stat_specifications" in yaml_config: yaml_config["stat_specifications"].append(general_capability_spec_config) + else: + yaml_config["stat_specifications"] = [general_capability_spec_config] + pass save_dict_to_yaml(yaml_config, learnware_yaml_path) with zipfile.ZipFile(zip_path, "a") as z_file: @@ -112,7 +103,7 @@ def _get_llm_base_model_learnware_ids(self, ids: Union[str, List[str]]) -> List[ if ( semantic_spec["Data"]["Values"] == ["Text"] and semantic_spec["Task"]["Values"] == ["Text Generation"] - and semantic_spec["Model Type"]["Values"] == ["Base Model"] + and semantic_spec["Model"]["Values"] == ["Base Model"] ): ret.append(idx) return ret diff --git a/learnware/specification/module.py b/learnware/specification/module.py index d58d3263..c1b28cd8 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -241,7 +241,7 @@ def generate_semantic_spec( semantic_specification = dict() semantic_specification["Data"] = {"Type": "Class", "Values": [data_type] if data_type is not None else []} semantic_specification["Task"] = {"Type": "Class", "Values": [task_type] if task_type is not None else []} - semantic_specification["Model Type"] = {"Type": "Class", "Values": [model_type] if model_type is not None else ["Others"]} + semantic_specification["Model"] = {"Type": "Optional", "Values": [model_type] if model_type is not None else ["Others"]} semantic_specification["Library"] = { "Type": "Class", "Values": [library_type] if library_type is not None else [], diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 26e6b6d5..9b43da72 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -114,6 +114,11 @@ def generate_stat_spec_from_system( else: logger.info("All LLMBenchmarkConfig have been evaluated before. No update.") + + def __str__(self): + spec_to_save = self.get_states() + return json.dumps(spec_to_save, separators=(",", ":")) + def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. From 6da4aa3011afa123de616dc1023cff716f56c1da Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Mon, 17 Mar 2025 15:34:18 +0800 Subject: [PATCH 085/108] [ENH] Complete workflow of skipping evaluation --- examples/dataset_llm_workflow/Readme.md | 16 + .../benchmark/__init__.py | 1 + .../dataset_llm_workflow/benchmark/base.py | 61 ++ .../dataset_llm_workflow/benchmark/config.py | 117 ++++ .../dataset_llm_workflow/benchmark/utils.py | 528 ++++++++++++++++++ examples/dataset_llm_workflow/build_market.py | 199 +++++++ examples/dataset_llm_workflow/eval_config.py | 155 +++++ examples/dataset_llm_workflow/workflow.py | 360 ++++++++++++ 8 files changed, 1437 insertions(+) create mode 100644 examples/dataset_llm_workflow/Readme.md create mode 100644 examples/dataset_llm_workflow/benchmark/__init__.py create mode 100644 examples/dataset_llm_workflow/benchmark/base.py create mode 100644 examples/dataset_llm_workflow/benchmark/config.py create mode 100644 examples/dataset_llm_workflow/benchmark/utils.py create mode 100644 examples/dataset_llm_workflow/build_market.py create mode 100644 examples/dataset_llm_workflow/eval_config.py create mode 100644 examples/dataset_llm_workflow/workflow.py diff --git a/examples/dataset_llm_workflow/Readme.md b/examples/dataset_llm_workflow/Readme.md new file mode 100644 index 00000000..f368f275 --- /dev/null +++ b/examples/dataset_llm_workflow/Readme.md @@ -0,0 +1,16 @@ +## 本地运行 +## Build Market +``` +# 更改用户名 +CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python build_market.py +``` +## workflow +``` +CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example medical +CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example math +CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example finance +``` +## workflow not skip eval +``` +CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example medical --rebuild False --skip_eval False +``` \ No newline at end of file diff --git a/examples/dataset_llm_workflow/benchmark/__init__.py b/examples/dataset_llm_workflow/benchmark/__init__.py new file mode 100644 index 00000000..eb171900 --- /dev/null +++ b/examples/dataset_llm_workflow/benchmark/__init__.py @@ -0,0 +1 @@ +from .base import Benchmark \ No newline at end of file diff --git a/examples/dataset_llm_workflow/benchmark/base.py b/examples/dataset_llm_workflow/benchmark/base.py new file mode 100644 index 00000000..be783629 --- /dev/null +++ b/examples/dataset_llm_workflow/benchmark/base.py @@ -0,0 +1,61 @@ +from .config import LEARNWARE_MATH, LEARNWARE_MED, USER_MED, USER_MATH, LEARNWARE_FIN, USER_FIN, LEARNWARE_MED_IDS, LEARNWARE_MATH_IDS, LEARNWARE_FIN_IDS +from .utils import prepare_train_data, prepare_test_data +from datasets import Dataset +from typing import List, Tuple + +class Benchmark: + def __init__(self, name: str): + self.name = name + self.set_datasets(name) + + def get_benchmark_name(self): + return self.name + + def set_datasets(self, name: str): + if name == "medical": + self.learnware_dict = LEARNWARE_MED + self.learnware_ids = LEARNWARE_MED_IDS + self.user_dict = USER_MED + elif name == "math": + self.learnware_dict = LEARNWARE_MATH + self.learnware_ids = LEARNWARE_MATH_IDS + self.user_dict = USER_MATH + elif name == "finance": + self.learnware_dict = LEARNWARE_FIN + self.learnware_ids = LEARNWARE_FIN_IDS + self.user_dict = USER_FIN + else: + raise NotImplementedError("other benchmarks are not implemented") + + def get_learnware_ids(self) -> List[str]: + return self.learnware_ids + + def get_learnware_data(self, dataset_name) -> List[str]: + train_dataset, val_dataset = prepare_train_data(self.learnware_dict[dataset_name]) + train_data, val_data = train_dataset["text"], val_dataset["text"] + return train_data, val_data + + def get_learnware_dataset(self, dataset_name) -> Tuple[Dataset, Dataset]: + train_dataset, val_dataset = prepare_train_data(self.learnware_dict[dataset_name]) + return train_dataset, val_dataset + + def get_user_data(self, dataset_name) -> List[str]: + test_dataset = prepare_test_data(self.user_dict[dataset_name]) + test_data = test_dataset["text"] + return test_data + + def get_user_dataset(self, dataset_name) -> Dataset: + test_dataset = prepare_test_data(self.user_dict[dataset_name]) + return test_dataset + + # def get_learnwares(self): + # return self.learnware_dict + + # def get_users(self): + # return self.user_dict + + def get_learnware_names(self) -> List[str]: + return list(self.learnware_dict.keys()) + + def get_user_names(self) -> List[str]: + return list(self.user_dict.keys()) diff --git a/examples/dataset_llm_workflow/benchmark/config.py b/examples/dataset_llm_workflow/benchmark/config.py new file mode 100644 index 00000000..00208735 --- /dev/null +++ b/examples/dataset_llm_workflow/benchmark/config.py @@ -0,0 +1,117 @@ +LEARNWARE_MATH = { + "MWP-Instruct": "Macropodus/MWP-Instruct", + # "goat": "tiedong/goat", + "school_math_0.25M": "BelleGroup/school_math_0.25M", + "MathInstruct": "TIGER-Lab/MathInstruct", + "MetaMathQA": "meta-math/MetaMathQA", + "orca-math-word-problems-200k": "microsoft/orca-math-word-problems-200k", + "Arithmo-Data": "akjindal53244/Arithmo-Data", + # "MATH-K-100-train": "Dahoas/MATH-K-100-train", + "MATH_train": "ScalableMath/MATH_train-cleaned_processed", + "MetaMath-GSM240K": "fxmeng/MetaMath-GSM240K", + "GSM8K_zh": "meta-math/GSM8K_zh", +} + +LEARNWARE_MED = { + "AlpaCare": "lavita/AlpaCare-MedInstruct-52k", + "ChatDoctor": "lavita/ChatDoctor-HealthCareMagic-100k", + "medalpaca_cleaned": "medalpaca/medical_meadow_wikidoc,medalpaca/medical_meadow_medical_flashcards,medalpaca/medical_meadow_wikidoc_patient_information,medalpaca/medical_meadow_pubmed_causal,medalpaca/medical_meadow_mediqa,medalpaca/medical_meadow_health_advice", + # "medical_flashcards": "medalpaca/medical_meadow_medical_flashcards", + "medqa_train": "medalpaca/medical_meadow_medqa", + "pubmed_causal": "medalpaca/medical_meadow_pubmed_causal", + "medmcqa_train": "chenhaodev/medmcqa_instruct", + "medqa_train&pubmed_causal": "medalpaca/medical_meadow_medqa,medalpaca/medical_meadow_pubmed_causal", + "AlpaCare&ChatDoctor": "LinhDuong/chatdoctor-5k,lavita/ChatDoctor-HealthCareMagic-100k,lavita/AlpaCare-MedInstruct-52k", + "medalpaca_cleaned&AlpaCare&ChatDoctor": "medalpaca/medical_meadow_wikidoc,medalpaca/medical_meadow_medical_flashcards,medalpaca/medical_meadow_wikidoc_patient_information,medalpaca/medical_meadow_pubmed_causal,medalpaca/medical_meadow_mediqa,medalpaca/medical_meadow_health_advice,LinhDuong/chatdoctor-5k,lavita/ChatDoctor-HealthCareMagic-100k,lavita/AlpaCare-MedInstruct-52k", + "medqa_train&medmcqa_train": "medalpaca/medical_meadow_medqa,chenhaodev/medmcqa_instruct", +} + +LEARNWARE_FIN = { + "australian": "ChanceFocus/flare-australian", + "cra_lendingclub": "ChanceFocus/cra-lendingclub", + "fiqasa": "ChanceFocus/flare-fiqasa", + "fpb": "ChanceFocus/en-fpb", + "german": "ChanceFocus/flare-german", + "headlines": "ChanceFocus/flare-headlines", + "ner": "ChanceFocus/flare-ner", + "sm_acl": "ChanceFocus/flare-sm-acl", + "sm_bigdata": "TheFinAI/en-forecasting-bigdata", + "sm_cikm": "ChanceFocus/flare-sm-cikm", + # "convfinqa": "ChanceFocus/flare-convfinqa", + # "finqa": "ChanceFocus/flare-finqa", + # "cra_ccf": "ChanceFocus/cra-ccf", + # "cra_ccfraud": "ChanceFocus/cra-ccfraud", + # "cra_polish": "ChanceFocus/cra-polish", + # "cra_taiwan": "TheFinAI/cra-taiwan", +} + +USER_MED = { + "medmcqa": "openlifescienceai/medmcqa", + "medqa_4options": "GBaker/MedQA-USMLE-4-options-hf", + "anatomy": "hails/mmlu_no_train,anatomy", + "college_biology": "hails/mmlu_no_train,college_biology", + "clinical_knowledge": "hails/mmlu_no_train,clinical_knowledge", + "college_medicine": "hails/mmlu_no_train,college_medicine", + "medical_genetics": "hails/mmlu_no_train,medical_genetics", + "professional_medicine": "hails/mmlu_no_train,professional_medicine", + "pubmedqa": "bigbio/pubmed_qa,pubmed_qa_labeled_fold0_source", +} + +USER_MATH = { + "agieval_aqua_rat": "hails/agieval-aqua-rat", + "agieval_gaokao_mathcloze": "hails/agieval-gaokao-mathcloze", + "agieval_gaokao_mathqa": "hails/agieval-gaokao-mathqa", + "agieval_math": "hails/agieval-math", + "agieval_sat_math": "hails/agieval-sat-math", + "cmmlu_college_mathematics": "haonan-li/cmmlu,college_mathematics", + "cmmlu_elementary_mathematics": "haonan-li/cmmlu,elementary_mathematics", + "cmmlu_high_school_mathematics": "haonan-li/cmmlu,high_school_mathematics", + "gsm8k": "gsm8k,main", + "mathqa": "allenai/math_qa", + "mgsm_native_cot_zh": "juletxara/mgsm,zh", + # "minerva_math_algebra": "EleutherAI/hendrycks_math,algebra", + # "minerva_math_counting_and_prob": "EleutherAI/hendrycks_math,counting_and_probability", + # "minerva_math_geometry": "EleutherAI/hendrycks_math,geometry", + # "minerva_math_intermediate_algebra": "EleutherAI/hendrycks_math,intermediate_algebra", + # "minerva_math_num_theory": "EleutherAI/hendrycks_math,number_theory", + # "minerva_math_prealgebra": "EleutherAI/hendrycks_math,prealgebra", + # "minerva_math_precalculus": "EleutherAI/hendrycks_math,precalculus", + "minerva_math": "lighteval/MATH,all", + "abstract_algebra": "hails/mmlu_no_train,abstract_algebra", + "college_mathematics": "hails/mmlu_no_train,college_mathematics", + "elementary_mathematics": "hails/mmlu_no_train,elementary_mathematics", + "high_school_mathematics": "hails/mmlu_no_train,high_school_mathematics", +} + +USER_FIN = { + "australian": "ChanceFocus/flare-australian", + "cra_lendingclub": "ChanceFocus/cra-lendingclub", + "fiqasa": "ChanceFocus/flare-fiqasa", + "fpb": "ChanceFocus/en-fpb", + "german": "ChanceFocus/flare-german", + "headlines": "ChanceFocus/flare-headlines", + "ner": "ChanceFocus/flare-ner", + "sm_acl": "ChanceFocus/flare-sm-acl", + "sm_bigdata": "TheFinAI/en-forecasting-bigdata", + "sm_cikm": "ChanceFocus/flare-sm-cikm", + "causal20_sc": "ChanceFocus/flare-causal20-sc", + "finarg_ecc_arc": "ChanceFocus/flare-finarg-ecc-arc", + "finarg_ecc_auc": "ChanceFocus/flare-finarg-ecc-auc", + "fomc": "ChanceFocus/flare-fomc", + "ma": "ChanceFocus/flare-ma", + "mlesg": "ChanceFocus/flare-mlesg", + "multifin_en": "ChanceFocus/flare-multifin-en", +} + +LEARNWARE_MED_IDS = [ + "00010000" +] + +LEARNWARE_MATH_IDS = [ + "00010001" +] + + +LEARNWARE_FIN_IDS = [ + "00010002" +] \ No newline at end of file diff --git a/examples/dataset_llm_workflow/benchmark/utils.py b/examples/dataset_llm_workflow/benchmark/utils.py new file mode 100644 index 00000000..669c8d86 --- /dev/null +++ b/examples/dataset_llm_workflow/benchmark/utils.py @@ -0,0 +1,528 @@ +import re +import random +from datasets import load_dataset, concatenate_datasets +from typing import List + +from .config import LEARNWARE_FIN, LEARNWARE_MATH, LEARNWARE_MED, USER_FIN + + +def preprocess_alpaca(docs): + alpaca_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}" + instructions = docs["instruction"] + inputs = docs["input"] + outputs = docs["output"] + texts = [] + for instruction, input, output in zip(instructions, inputs, outputs): + text = alpaca_prompt.format(instruction, input, output) + texts.append(text) + return texts + + +def preprocess_alpaca_no_label(docs): + alpaca_no_label_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n" + instructions = docs["instruction"] + inputs = docs["input"] + texts = [] + for instruction, input in zip(instructions, inputs): + text = alpaca_no_label_prompt.format(instruction, input) + texts.append(text) + return texts + + +def preprocess_alpaca_no_input(docs): + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["instruction"] + outputs = docs["output"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_alpaca_no_input_no_label(docs): + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["instruction"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qr(docs): + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query"] + outputs = docs["response"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qr_no_label(docs): + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["query"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qr_zh(docs): + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query_zh"] + outputs = docs["response_zh"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qr_zh_no_label(docs): + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["query_zh"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qa(docs): + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qa_no_label(docs): + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["question"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_qa_zh(docs): + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question_zh"] + outputs = docs["answer_zh"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_qa_zh_no_label(docs) -> str: + alpaca_no_input_no_label_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n" + instructions = docs["question_zh"] + texts = [] + for instruction in instructions: + text = alpaca_no_input_no_label_prompt.format(instruction) + texts.append(text) + return texts + + +def preprocess_finance(docs) -> str: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["query"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + instruction.rstrip(' Answer:') + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +def preprocess_math_train(docs) -> str: + alpaca_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. \n\n### Instruction:\n{}\n\n### Response:\n{}" + instructions = docs["question"] + outputs = docs["answer_detail"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = alpaca_no_input_prompt.format(instruction, output) + texts.append(text) + return texts + + +# Copied from Master +def preprocess_medmcqa(doc) -> str: + """ + Question: + Choices: + A. + B. + C. + D. + Answer: + """ + choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]] + option_choices = { + "A": choices[0], + "B": choices[1], + "C": choices[2], + "D": choices[3], + } + + prompt = "Question: " + doc["question"] + "\nChoices:\n" + for choice, option in option_choices.items(): + prompt += f"{choice.upper()}. {option}\n" + prompt += "Answer:" + return prompt + + +def preprocess_medmcqa_val(docs): + opas = docs["opa"] + opbs = docs["opb"] + opcs = docs["opc"] + opds = docs["opd"] + questions = docs["question"] + option_ids = docs["cop"] + texts = [] + for opa, opb, opc, opd, question, option_id in zip(opas, opbs, opcs, opds, questions, option_ids): + option_choices = { + "A": opa, + "B": opb, + "C": opc, + "D": opd, + } + prompt = "Question: " + question + "\nChoices:\n" + for choice, option in option_choices.items(): + prompt += f"{choice.upper()}. {option}\n" + prompt += f"Answer: {list(option_choices.keys())[option_id]}" + texts.append(prompt) + return texts + + +def preprocess_medqa(doc) -> str: + option_choices = { + "A": doc["ending0"], + "B": doc["ending1"], + "C": doc["ending2"], + "D": doc["ending3"], + } + answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items()) + return f"Question: {doc['sent1']}\n{answers}Answer:" + + +def preprocess_medqa_val(docs): + ending0s = docs["ending0"] + ending1s = docs["ending1"] + ending2s = docs["ending2"] + ending3s = docs["ending3"] + sent1s = docs["sent1"] + labels = docs["label"] + texts = [] + for sent1, ending0, ending1, ending2, ending3, label in zip(sent1s, ending0s, ending1s, ending2s, ending3s, labels): + option_choices = { + "A": ending0, + "B": ending1, + "C": ending2, + "D": ending3, + } + answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items()) + texts.append(f"Question: {sent1}\n{answers}Answer: {list(option_choices.keys())[label]}") + return texts + + +def preprocess_mmlu(doc) -> str: + question = doc["question"].strip() + choices = doc["choices"] + return "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer:".format( + question, + choices[0], + choices[1], + choices[2], + choices[3] + ) + + +def preprocess_mmlu_val(docs): + questions = docs["question"] + choices = docs["choices"] + answers = docs["answer"] + texts = [] + for question, options, answer in zip(questions, choices, answers): + texts.append( + "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer: {}".format( + question.strip(), + options[0], + options[1], + options[2], + options[3], + ["A", "B", "C", "D"][answer] + ) + ) + return texts + + +def preprocess_pubmedqa(doc) -> str: + ctxs = "\n".join(doc["CONTEXTS"]) + return "Abstract: {}\nQuestion: {}\nAnswer:".format( + ctxs, + doc["QUESTION"], + ) + + +def preprocess_pubmedqa_val(docs): + contexts_list = docs["CONTEXTS"] + questions = docs["QUESTION"] + answers = docs["final_decision"] + texts = [] + for contexts, question, answer in zip(contexts_list, questions, answers): + ctxs = "\n".join(contexts) + texts.append("Abstract: {}\nQuestion: {}\nAnswer: {}".format(ctxs, question, answer)) + return texts + + +def preprocess_agieval(doc) -> str: + return doc["query"] + + +def preprocess_cmmlu(doc) -> str: + question = doc["Question"].strip() + return "{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format( + question, + doc["A"], + doc["B"], + doc["C"], + doc["D"] + ) + + +def preprocess_cmmlu_val(docs): + questions = docs["Question"] + as_ = docs["A"] + bs = docs["B"] + cs = docs["C"] + ds = docs["D"] + answers = docs["Answer"] + texts = [] + for question, a, b, c, d, answer in zip(questions, as_, bs, cs, ds, answers): + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format( + question.strip(), a, b, c, d, answer + )) + return texts + + +def preprocess_mathqa(doc) -> str: + return "Question: {}\nAnswer:".format( + doc["Problem"] + ) + + +def preprocess_mgsm(doc) -> str: + return "问题: "+doc["question"]+"\n逐步解答:" + + +def preprocess_gsm8k(doc) -> str: + return "Question: {}\nAnswer:".format(doc["question"]) + + +def preprocess_mathqa_val(docs): + problems = docs["Problem"] + corrects = docs["correct"] + options = docs["options"] + texts = [] + for problem, correct, option in zip(problems, corrects, options): + choices = [ + c[4:].rstrip(" ,") + for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option) + ] + + # answer = ['a', 'b', 'c', 'd', 'e'].index(correct) + texts.append("Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format(problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct)) + return texts + +def preprocess_mgsm_val(docs): + questions = docs["question"] + answers = docs["answer"] + texts = [question + "\n" + answer for question, answer in zip(questions, answers)] + return texts + + +def preprocess_gsm8k_val(docs): + instructions = docs["question"] + outputs = docs["answer"] + texts = [] + for instruction, output in zip(instructions, outputs): + text = f"Question: {instruction}\nAnswer: {output}" + texts.append(text) + return texts + + +def preprocess_math(doc: dict) -> str: + return "Problem:" + "\n" + doc["problem"] + "\n\n" + "Solution:" + + +def math_fewshot_prompt(doc: dict) -> str: + return "Problem:" + "\n" + doc["problem"] + "\n\n" + "Solution:" + doc["solution"] + + +def math_fewshot_samples() -> list[dict]: + return [ + { + "problem": "Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.}", + "solution": "The expressions inside each square root must be non-negative. Therefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{[2,5)}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct.", + "few_shot": "1", + }, + { + "problem": "If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$", + "solution": "We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$\nFinal Answer: The final answer is $24$. I hope it is correct.", + "few_shot": "1", + }, + { + "problem": "Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?", + "solution": "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$:\n\\begin{align*}\n30n&=480\\\n\\Rightarrow\\qquad n&=480/30=\\boxed{16}\n\\end{align*}\nFinal Answer: The final answer is $16$. I hope it is correct.", + "few_shot": "1", + }, + { + "problem": "If the system of equations\n\n\\begin{align*}\n6x-4y&=a,\\\n6y-9x &=b.\n\\end{align*}has a solution $(x, y)$ where $x$ and $y$ are both nonzero,\nfind $\\frac{a}{b},$ assuming $b$ is nonzero.", + "solution": "If we multiply the first equation by $-\\frac{3}{2}$, we obtain\n\n$$6y-9x=-\\frac{3}{2}a.$$Since we also know that $6y-9x=b$, we have\n\n$$-\\frac{3}{2}a=b\\Rightarrow\\frac{a}{b}=\\boxed{-\\frac{2}{3}}.$$\nFinal Answer: The final answer is $-\\frac{2}{3}$. I hope it is correct.", + "few_shot": "1", + }, + ] + + +def preprocess_finance_test(doc) -> str: + return doc["query"] + + +PROCESS_FUNC = { + # medical user + "openlifescienceai/medmcqa": preprocess_medmcqa, + "GBaker/MedQA-USMLE-4-options-hf": preprocess_medqa, + "hails/mmlu_no_train": preprocess_mmlu, + "bigbio/pubmed_qa": preprocess_pubmedqa, + # math user + "hails/agieval-gaokao-mathcloze": preprocess_agieval, + "hails/agieval-gaokao-mathqa": preprocess_agieval, + "hails/agieval-aqua-rat": preprocess_agieval, + "hails/agieval-math": preprocess_agieval, + "hails/agieval-sat-math": preprocess_agieval, + "haonan-li/cmmlu": preprocess_cmmlu, + "allenai/math_qa": preprocess_mathqa, + "juletxara/mgsm": preprocess_mgsm, + # "openai/gsm8k": preprocess_gsm8k, + # math learnware + "TIGER-Lab/MathInstruct": preprocess_alpaca_no_input_no_label, + "meta-math/MetaMathQA": preprocess_qr_no_label, + "meta-math/MetaMathQA-40K": preprocess_qr_no_label, + "fxmeng/MetaMath-GSM240K": preprocess_qr_no_label, + "meta-math/MetaMathQA_GSM8K_zh": preprocess_qr_zh_no_label, + "meta-math/GSM8K_zh": preprocess_qa_zh_no_label, + # "Dahoas/MATH-K-100-train": preprocess_math_k_100, + "ScalableMath/MATH_train-cleaned_processed": preprocess_qa_no_label, + "akjindal53244/Arithmo-Data": preprocess_qa_no_label, + "microsoft/orca-math-word-problems-200k": preprocess_qa_no_label, +} + + +PROCESS_FUNC_WITH_LABEL = { + # medical user + "openlifescienceai/medmcqa": preprocess_medmcqa_val, + "GBaker/MedQA-USMLE-4-options-hf": preprocess_medqa_val, + "hails/mmlu_no_train": preprocess_mmlu_val, + "bigbio/pubmed_qa": preprocess_pubmedqa_val, + # math user + "haonan-li/cmmlu": preprocess_cmmlu_val, + "allenai/math_qa": preprocess_mathqa_val, + "juletxara/mgsm": preprocess_mgsm_val, + "lighteval/MATH": preprocess_math_train, + "gsm8k": preprocess_gsm8k_val, + # math learnware + "TIGER-Lab/MathInstruct": preprocess_alpaca_no_input, + "meta-math/MetaMathQA": preprocess_qr, + "meta-math/MetaMathQA-40K": preprocess_qr, + "fxmeng/MetaMath-GSM240K": preprocess_qr, + "meta-math/MetaMathQA_GSM8K_zh": preprocess_qr_zh, + "meta-math/GSM8K_zh": preprocess_qa_zh, + # "Dahoas/MATH-K-100-train": preprocess_math_k_100, + "ScalableMath/MATH_train-cleaned_processed": preprocess_math_train, + "akjindal53244/Arithmo-Data": preprocess_qa, + "microsoft/orca-math-word-problems-200k": preprocess_qa, +} + + +def prepare_train_data(dataset_name_str): + if dataset_name_str in list(PROCESS_FUNC_WITH_LABEL.keys()): + dataset = load_dataset(dataset_name_str, split="train") + if dataset_name_str == "meta-math/GSM8K_zh": + dataset = dataset.filter(lambda x: x['split']=='train') + dataset = dataset.map(lambda x: {"text": PROCESS_FUNC_WITH_LABEL[dataset_name_str](x)}, batched = True) + split_dataset = dataset.train_test_split(test_size=0.1) + train_dataset = split_dataset['train'] + val_dataset = split_dataset['test'] + elif dataset_name_str in list(LEARNWARE_FIN.values()): + train_dataset = load_dataset(dataset_name_str, split="train") + if "cra" not in dataset_name_str: + val_dataset = load_dataset(dataset_name_str, split="valid") + else: + val_dataset = load_dataset(dataset_name_str, split="validation") + train_dataset = train_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched = True) + val_dataset = val_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched = True) + else: + dataset_list = dataset_name_str.split(',') + train_datasets = [] + for dataset_name in dataset_list: + dataset = load_dataset(dataset_name, split="train") + dataset = dataset.remove_columns([col for col in dataset.column_names if col not in ['instruction', 'input', 'output']]) + train_datasets.append(dataset) + combined_dataset = concatenate_datasets(train_datasets) + combined_dataset = combined_dataset.map(lambda x: {"text": preprocess_alpaca(x)}, batched = True) + split_dataset = combined_dataset.train_test_split(test_size=0.1) + train_dataset = split_dataset['train'] + val_dataset = split_dataset['test'] + + return train_dataset, val_dataset + + +def prepare_test_data(dataset_name_str): + temp_list = dataset_name_str.split(",") + subset_name = None + if len(temp_list) != 1: + subset_name = temp_list[1] + dataset_name = temp_list[0] + if subset_name: + test_dataset = load_dataset(dataset_name, subset_name, split="test") + else: + test_dataset = load_dataset(dataset_name, split="test") + + if dataset_name == "gsm8k": + rnd = random.Random(1234) + train_dataset = load_dataset(dataset_name, "main", split="train") + train_dataset = train_dataset.map(lambda x: {"text": preprocess_gsm8k_val(x)}, batched=True) + train_docs = train_dataset["text"] + fewshot_examples = rnd.sample(train_docs, 5) + fewshot_context = ( + "\n\n".join(fewshot_examples) + "\n\n" + ) + test_dataset = test_dataset.map(lambda x: {"text": fewshot_context + preprocess_gsm8k(x)}) + elif dataset_name == "lighteval/MATH": + fewshot_context = ( + "\n\n".join( + [ + math_fewshot_prompt(example) + for example in math_fewshot_samples() + ] + ) + + "\n\n" + ) + test_dataset = test_dataset.map(lambda x: {"text": fewshot_context + preprocess_math(x)}) + elif dataset_name in list(USER_FIN.values()): + test_dataset = test_dataset.map(lambda x: {"text": preprocess_finance_test(x)}) + else: + test_dataset = test_dataset.map(lambda x: {"text": PROCESS_FUNC[dataset_name](x)}) + return test_dataset diff --git a/examples/dataset_llm_workflow/build_market.py b/examples/dataset_llm_workflow/build_market.py new file mode 100644 index 00000000..713affc2 --- /dev/null +++ b/examples/dataset_llm_workflow/build_market.py @@ -0,0 +1,199 @@ +import shutil +from learnware.market import instantiate_learnware_market +from learnware.specification import generate_semantic_spec +from learnware.specification.module import generate_generative_model_spec, generate_rkme_text_spec + +from benchmark import Benchmark +from benchmark.config import LEARNWARE_FIN, LEARNWARE_MED, LEARNWARE_MATH + +import os +import zipfile +import torch + + +def prepare_learnware(benchmark_name, name): + dataset_name = name + default_path = "learnware_pool/default/" + + if dataset_name == "fiqasa": + base_model = "Meta-Llama-3.1-8B" + elif dataset_name == "australian": + base_model = "Meta-Llama-3.1-8B-Instruct" + else: + base_model = "Qwen2.5-7B" + + model_folder = f"models/{base_model}/{dataset_name}" + versions = sorted(os.listdir(model_folder)) + + for i, version in enumerate(versions): + folder_path = f"learnware_pool/{benchmark_name}/learnwares/{dataset_name}-{i+1}" + os.makedirs(folder_path, exist_ok=True) + copy_adapter(folder_path, version, model_folder) + update_from_default(folder_path, os.path.join(default_path, base_model)) + build_specification_from_cache(folder_path, dataset_name) + zip_dir = f"learnware_pool/{benchmark_name}/zips" + os.makedirs(zip_dir, exist_ok=True) + zip_path = os.path.join(zip_dir, f"{dataset_name}-{i+1}.zip") + compress_folder_to_zip(folder_path, zip_path) + + +def add_learnware_to_market(benchmark_name, name, market): + dataset_name = name + default_path = "learnware_pool/default/" + benchmark2scenario = { + "medical": "Health", + "finance": "Financial", + "math": "Others" + } + + if dataset_name == "fiqasa": + base_model = "Meta-Llama-3.1-8B" + base_model_path = "NousResearch/Meta-Llama-3.1-8B" + license = "Others" + elif dataset_name == "australian": + base_model = "Meta-Llama-3.1-8B-Instruct" + base_model_path = "NousResearch/Meta-Llama-3.1-8B-Instruct" + license = "Others" + else: + base_model = "Qwen2.5-7B" + base_model_path = "Qwen/Qwen2.5-7B" + license = "Apache-2.0" + + model_folder = f"models/{base_model}/{dataset_name}" + versions = sorted(os.listdir(model_folder)) + + for i, version in enumerate(versions): + folder_path = f"learnware_pool/{benchmark_name}/learnwares/{dataset_name}-{i+1}" + os.makedirs(folder_path, exist_ok=True) + copy_adapter(folder_path, version, model_folder) + update_from_default(folder_path, os.path.join(default_path, base_model)) + build_specification_from_cache(folder_path, dataset_name) + zip_dir = f"learnware_pool/{benchmark_name}/zips" + os.makedirs(zip_dir, exist_ok=True) + zip_path = os.path.join(zip_dir, f"{dataset_name}-{i+1}.zip") + compress_folder_to_zip(folder_path, zip_path) + + semantic_spec = generate_semantic_spec( + name=f"{dataset_name}-{i+1}", + description=f"LoRA adapter fine-tuned using SFT on the {dataset_name} dataset. Hugging Face path of its base model: {base_model_path}", + data_type="Text", + model_type="PEFT Model", + task_type="Text Generation", + library_type="PyTorch", + scenarios=[benchmark2scenario[benchmark_name]], + license=license, + input_description=None, + output_description=None, + ) + # semantic_spec = generate_semantic_spec( + # name=name, + # description=name, + # data_type="Text", + # model_type="Base Model", + # task_type="Text Generation", + # library_type="PyTorch", + # scenarios=["Others"], + # license="Others", + # input_description=None, + # output_description=None, + # ) + market.add_learnware(zip_path, semantic_spec) + + +def update_from_default(folder_path, default_path): + for item in os.listdir(default_path): + src_item = os.path.join(default_path, item) + dest_item = os.path.join(folder_path, item) + + if not os.path.exists(dest_item): + print(f"Copy default files to {dest_item}") + if os.path.isdir(src_item): + shutil.copytree(src_item, dest_item) + else: + shutil.copy2(src_item, dest_item) + + +def copy_adapter(folder_path, version, model_folder): + if not os.path.exists(os.path.join(folder_path, "adapter")): + print(f"Copy adapter files from {model_folder}/{version} to {folder_path}") + os.makedirs(folder_path, exist_ok=True) + shutil.copytree( + os.path.join(model_folder, version, "adapter"), + os.path.join(folder_path, "adapter")) + + +def compress_folder_to_zip(folder_path, zip_file_path): + """ + 将指定文件夹压缩为 ZIP 文件。 + + :param folder_path: 要压缩的文件夹路径 + :param zip_file_path: 生成的 ZIP 文件路径 + """ + if not os.path.exists(zip_file_path): + print(f"Compress folder to zip_path {zip_file_path}") + with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, dirs, files in os.walk(folder_path): + for file in files: + file_path = os.path.join(root, file) + # 将文件添加到 ZIP 中,并保留相对路径 + arcname = os.path.relpath(file_path, folder_path) + zipf.write(file_path, arcname) + + +def build_specification_from_cache(folder_path, dataset_name): + rkme_path = os.path.join(folder_path, "rkme.json") + generative_path = os.path.join(folder_path, "generative.pth") + + if not os.path.exists(rkme_path): + print(f"Build RKME from cache to {rkme_path}") + if dataset_name in LEARNWARE_FIN: + src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/finance/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" + shutil.copy2(src_path, rkme_path) + elif dataset_name in LEARNWARE_MED: + src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/medical/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" + shutil.copy2(src_path, rkme_path) + elif dataset_name in LEARNWARE_MATH: + src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/math/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" + shutil.copy2(src_path, rkme_path) + + if not os.path.exists(generative_path): + print(f"Build PAVE from cache to {generative_path}") + if dataset_name in LEARNWARE_FIN: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-finance-GridSearch-qwen/condidate-{1}/learnware-{dataset_name}/finetuned.pt", weights_only=False) + elif dataset_name in LEARNWARE_MED: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-med-GridSearch-qwen-backup/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) + elif dataset_name in LEARNWARE_MATH: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-math-GridSearch-qwen/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) + else: + raise NotImplementedError("Invalid dataset_name") + + finetuned_state_dict = finetuned_checkpoint["state_dict"]["model"] + task_vector = torch.concatenate([ + p.reshape(-1) for n, p in finetuned_state_dict.items() + ]) + torch.save({ + "type": "GenerativeModelSpecification", + "task_vector": task_vector.detach().cpu() + }, generative_path) + + + +def build_market(benchmark_name, rebuild=True): + llm_market = instantiate_learnware_market(market_id=f"llm_{benchmark_name}", name="llm", rebuild=rebuild) + benchmark = Benchmark(benchmark_name) + learnware_names = benchmark.get_learnware_names() + print("Leanrware Names:", ", ".join(learnware_names)) + for name in learnware_names: + title = "="*20 + name + "="*20 + print(title) + # train_dataset, _ = benchmark.get_learnware_dataset(name) + add_learnware_to_market(benchmark_name, name, llm_market) + # prepare_learnware(benchmark_name, name) + print("Market size after adding learnware:", len(llm_market)) + print("=" * len(title)) + + +if __name__ == "__main__": + build_market("medical") + build_market("math") + build_market("finance") \ No newline at end of file diff --git a/examples/dataset_llm_workflow/eval_config.py b/examples/dataset_llm_workflow/eval_config.py new file mode 100644 index 00000000..c0b89976 --- /dev/null +++ b/examples/dataset_llm_workflow/eval_config.py @@ -0,0 +1,155 @@ +from typing import List + +from learnware.tests.benchmarks import LLMBenchmarkConfig + + +medical_eval_configs: List[LLMBenchmarkConfig] = [ + LLMBenchmarkConfig( + name="medmcqa", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="medqa_4options", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_anatomy", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_clinical_knowledge", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_college_biology", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_college_medicine", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_medical_genetics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_professional_medicine", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="pubmedqa", + eval_metric="acc", + ), +] + +math_eval_configs: List[LLMBenchmarkConfig] = [ + LLMBenchmarkConfig( + name="agieval_aqua_rat", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="agieval_gaokao_mathcloze", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="agieval_gaokao_mathqa", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="agieval_math", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="agieval_sat_math", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="cmmlu_college_mathematics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="cmmlu_elementary_mathematics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="cmmlu_high_school_mathematics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="gsm8k", + eval_metric="exact_match,flexible-extract", + ), + LLMBenchmarkConfig( + name="mathqa", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mgsm_native_cot_zh", + eval_metric="exact_match,flexible-extract", + ), + LLMBenchmarkConfig( + name="minerva_math", + eval_metric="exact_match", + ), + LLMBenchmarkConfig( + name="mmlu_abstract_algebra", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_college_mathematics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_elementary_mathematics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_high_school_mathematics", + eval_metric="acc", + ), +] + +finance_eval_configs: List[LLMBenchmarkConfig] = [ + LLMBenchmarkConfig( + name="australian", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="cra_lendingclub", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="fiqasa", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="fpb", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_clinical_knowledge", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_college_biology", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_college_medicine", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_medical_genetics", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mmlu_professional_medicine", + eval_metric="acc", + ), +] + +eval_configs = { + "medical": medical_eval_configs, + "math": math_eval_configs, + "finance": finance_eval_configs +} \ No newline at end of file diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py new file mode 100644 index 00000000..78a43261 --- /dev/null +++ b/examples/dataset_llm_workflow/workflow.py @@ -0,0 +1,360 @@ +import fire +import time +import tempfile +import os +import copy +import pandas as pd +import torch +import shutil +import re +import numpy as np +import matplotlib.pyplot as plt +from typing import List, Dict +import lm_eval +from lm_eval.models.huggingface import HFLM + +from learnware.client import LearnwareClient +from learnware.logger import get_module_logger +from learnware.market import BaseUserInfo, instantiate_learnware_market +from learnware.learnware import Learnware +from learnware.specification import generate_semantic_spec +from learnware.specification import RKMETextSpecification +from learnware.specification import GenerativeModelSpecification +from learnware.tests.benchmarks import LLMBenchmarkConfig + +from benchmark import Benchmark +from benchmark.config import USER_FIN, USER_MATH, USER_MED +from eval_config import eval_configs + +logger = get_module_logger("llm_workflow", level="INFO") + + +def build_specification_from_cache(generative_spec_path, dataset_name): + print(f"Build PAVE from cache to {generative_spec_path}") + if dataset_name in USER_FIN: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-finance-GridSearch-qwen/condidate-{1}/user-{dataset_name}/finetuned.pt", weights_only=False) + elif dataset_name in USER_MED: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-med-GridSearch-qwen-backup/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) + elif dataset_name in USER_MATH: + finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-math-GridSearch-qwen/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) + else: + raise NotImplementedError("Invalid dataset_name") + + finetuned_state_dict = finetuned_checkpoint["state_dict"]["model"] + task_vector = torch.concatenate([ + p.reshape(-1) for n, p in finetuned_state_dict.items() + ]) + torch.save({ + "type": "GenerativeModelSpecification", + "task_vector": task_vector.detach().cpu() + }, generative_spec_path) + + +class LLMWorkflow: + def _plot_radar_chart(self, benchmark_name, results_table): + labels = list(results_table.index) + if benchmark_name == "finance": + column_split = [ + ["PAVE", "Qwen2.5-7B", "Llama3.1-8B-Instruct", "Llama3.1-8B"], + ["PAVE", "Qwen1.5-110B", "Qwen2.5-72B", "Llama3.1-70B-Instruct"], + ["PAVE", "Random", "Best-single", "Oracle"] + ] + YTICKS = [0.2, 0.4, 0.6, 0.8, 1.0] + ylim = (0, 1.15) + x_label_fontsize = 4.5 + labels = [ + "Australian", "LendingClub", "FiQA-SA", "FPB", "German", "Headlines", + "NER", "ACL18", "BigData22", "CIKM18", "SC", "FinArg-ARC", "FinArg-ACC", + "FOMC", "MA", "MLESG", "MultiFin" + ] + elif benchmark_name == "math": + column_split = [ + ["PAVE", "Qwen2.5-7B"], + ["PAVE", "Qwen1.5-110B"], + ["PAVE", "Random", "Best-single", "Oracle"] + ] + YTICKS = [0.4, 0.6, 0.8, 1.0] + ylim = (0.3, 1.3) + x_label_fontsize = 5 + elif benchmark_name == "medical": + column_split = [ + ["PAVE", "Qwen2.5-7B"], + ["PAVE", "Flan-PaLM-540B"], + ["PAVE", "Random", "Best-single", "Oracle"] + ] + YTICKS = [0.8, 0.9, 1.0] + ylim = (0.75, 1.1) + x_label_fontsize = 8 + + num_vars = len(labels) + + angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist() + angles += angles[:1] + + fig, axes = plt.subplots(1, 3, figsize=(16, 5), subplot_kw=dict(polar=True)) + + model_names = [ + "PAVE vs Base Model", + "PAVE vs Large-scale Model", + "Retrieve Learnware" + ] + + colors = [ + np.array([0.9, 0.17, 0.31]), + np.array([1.0, 0.49, 0.0]), + np.array([0.19, 0.55, 0.91]), + np.array([0.56, 0.74, 0.56]), + np.array([0.66, 0.66, 0.66]) + ] + + for i, (ax, model_name) in enumerate(zip(axes, model_names)): + ax.set_xticks(angles[:-1]) + ax.set_yticks(YTICKS) + ax.set_xticklabels(labels, fontsize=x_label_fontsize, rotation=30) + ax.set_yticklabels([str(y) for y in YTICKS]) + ax.set_ylim(ylim[0], ylim[1]) + ax.set_title(model_name, pad=30) + + methods = column_split[i] + + for i, (method, color) in enumerate(zip(methods, colors[:len(methods)])): + if i == 0: + zorder = 2 + else: + zorder = 1 + + values = (results_table[method] / results_table["Oracle"]).tolist() + values += values[:1] + + ax.plot(angles, values, color=color, linewidth=2, label=method, zorder=zorder) + ax.fill(angles, values, color=color, alpha=0.1, zorder=zorder) + + ax.legend(loc="lower left", fontsize=8, bbox_to_anchor=(0.85, 0.9)) + + plt.tight_layout() + os.makedirs("results/figs", exist_ok=True) + # plt.savefig(f"results/figs/llm-{benchmark_name}.pdf") + + def _anlysis_table(self, benchmark_name, table, score_results): + if benchmark_name == 'finance': + start_column_id = 7 + else: # math / medical + start_column_id = 3 + table = table[:-1] + performance = table.melt(id_vars=['Dataset'], value_vars=table.columns[start_column_id:], var_name="Source_Config") + performance_extra = table.iloc[:, :start_column_id] + performance = pd.concat([performance, performance["Source_Config"].str.extract(r"(.+)-(\d+)").rename(columns={0:"Learnware"})], axis=1) + performance["Learnware"] = performance["Learnware"].apply(lambda s: s[:-1] if s[-1] == "-" else s) + performance = performance.rename(columns={"Dataset": "User"}) + performance.drop(columns=[1], inplace=True) + perf_merged = performance[["User", "Learnware", "value"]].groupby(["Learnware", "User"]).mean().reset_index() + + performance_extra = performance_extra.rename(columns={"Dataset": "User"}) + performance_extra = performance_extra.set_index("User") + + score_results = pd.DataFrame(score_results) + score_results["Rank-PAVE"] = score_results.groupby("User")["Similarity"].rank(method="min", ascending=False).astype(int) - 1 + adaptation_info = pd.merge(score_results, perf_merged, on=["Learnware", "User"]) + random_value = (adaptation_info[["User", "value"]] + .groupby(['User']).mean()).rename(columns={"value": "Random"}) + oracle_value = (adaptation_info[["User", "value"]] + .groupby(['User']).max()).rename(columns={"value": "Oracle"}) + pave_value = (adaptation_info[adaptation_info["Rank-PAVE"] < 1][["User", "value"]] + .groupby(['User']).mean()).rename(columns={"value": "PAVE"}) + + # Best-single + perf_pivot = perf_merged.pivot(index="User", columns="Learnware", values="value") + best_column = perf_pivot.mean().idxmax() + best_single = perf_pivot[[best_column]].rename(columns={best_column: 'Best-single'}) + + adaptation_table = pd.concat([random_value, best_single, pave_value, oracle_value], axis=1) + + # join performance_extra + adaptation_table = performance_extra.join(adaptation_table) + + # Avg Rank + ranks = adaptation_table.rank(axis=1, method="min", ascending=False) + avg_rank = ranks.mean() + + # PAVE win/tie/loss + pave_scores = adaptation_table["PAVE"] + win_tie_loss = {} + + for col in adaptation_table.columns: + if col == "PAVE": + continue + win = (pave_scores > adaptation_table[col]).sum() + tie = (pave_scores == adaptation_table[col]).sum() + loss = (pave_scores < adaptation_table[col]).sum() + win_tie_loss[col] = f"{win}/{tie}/{loss}" + + # Oracle win/tie/loss + oracle_scores = adaptation_table["Oracle"] + win_tie_loss_o = {} + + for col in adaptation_table.columns: + if col == "Oracle": + continue + win = (oracle_scores > adaptation_table[col]).sum() + tie = (oracle_scores == adaptation_table[col]).sum() + loss = (oracle_scores < adaptation_table[col]).sum() + win_tie_loss_o[col] = f"{win}/{tie}/{loss}" + + adaptation_table.loc['Avg.'] = adaptation_table.mean() + adaptation_table.loc["Avg. rank"] = avg_rank + adaptation_table = adaptation_table.round(2) + adaptation_table.loc["PAVE (win/tie/loss)"] = win_tie_loss + adaptation_table.loc["Oracle (win/tie/loss)"] = win_tie_loss_o + + os.makedirs("results/tables", exist_ok=True) + # adaptation_table.to_csv(f"results/tables/llm-{benchmark_name}.csv") + print(adaptation_table) + + return adaptation_table + + def _prepare_market(self, benchmark: Benchmark, rebuild=False): + client = LearnwareClient() + self.llm_benchmark = benchmark + self.llm_market = instantiate_learnware_market(market_id=f"llm_{self.llm_benchmark.name}", name="llm", rebuild=rebuild) + self.user_semantic = client.get_semantic_specification(self.llm_benchmark.learnware_ids[0]) + self.user_semantic["Name"]["Values"] = "" + self.user_semantic["Description"]["Values"] = "" + self.user_semantic["License"]["Values"] = ['Apache-2.0', 'Others'] + + if len(self.llm_market) == 0 or rebuild is True: + for learnware_id in self.llm_benchmark.learnware_ids: + with tempfile.TemporaryDirectory(prefix="llm_benchmark_") as tempdir: + zip_path = os.path.join(tempdir, f"{learnware_id}.zip") + for i in range(20): + try: + semantic_spec = client.get_semantic_specification(learnware_id) + client.download_learnware(learnware_id, zip_path) + self.llm_market.add_learnware(zip_path, semantic_spec) + break + except Exception: + time.sleep(1) + continue + + logger.info("Total Item: %d" % (len(self.llm_market))) + + def _prepare_market_from_disk(self, benchmark: Benchmark, rebuild=False): + self.llm_benchmark = benchmark + self.llm_market = instantiate_learnware_market(market_id=f"llm_{self.llm_benchmark.name}", name="llm", rebuild=rebuild) + self.user_semantic = copy.deepcopy(self.llm_market.get_learnwares()[0].specification.semantic_spec) + self.user_semantic["Name"]["Values"] = "" + self.user_semantic["Description"]["Values"] = "" + self.user_semantic["License"]["Values"] = ['Apache-2.0', 'Others'] + logger.info("Total Item: %d" % (len(self.llm_market))) + + + def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark): + generative_spec = GenerativeModelSpecification() + generative_spec_path = os.path.join(saved_folder, name, "generative.pth") + + os.makedirs(os.path.join(saved_folder, name), exist_ok=True) + + if os.path.exists(generative_spec_path): + generative_spec.load(generative_spec_path) + else: + # build_specification_from_cache(generative_spec_path, name) + # generative_spec.load(generative_spec_path) + train_dataset = benchmark.get_user_dataset(name) + generative_spec.generate_stat_spec_from_data(dataset=train_dataset) + generative_spec.save(generative_spec_path) + + return generative_spec + + def _get_scores(self, base_model: str, adapter_path, benchmark_configs: List[LLMBenchmarkConfig], batch_size='auto'): + # learnware.instantiate_model() + # model = learnware.get_model().get_model() + task_manager = lm_eval.tasks.TaskManager() + task_names = [config.name for config in benchmark_configs] + + lm_obj = HFLM(pretrained=base_model, peft=adapter_path, batch_size=batch_size) + results = lm_eval.simple_evaluate( + model=lm_obj, + tasks=task_names, + task_manager=task_manager, + ) + + score_list = [] + for config in benchmark_configs: + score = results['results'][config.name][f'{config.eval_metric},none'] * 100 + score = round(score, 2) + logger.info(f"Name: {config.name}, Score: {score}") + score_list.append(score) + + return score_list + + def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): + benchmark = Benchmark(benchmark_name) + # self._prepare_market(benchmark, rebuild) # online + self._prepare_market_from_disk(benchmark, rebuild) + user_names = benchmark.get_user_names() + + score_results = { + "User": [], + "Learnware": [], + "Similarity": [] + } + + for name in user_names: + title = "=" * 20 + name + "=" * 20 + print(title) + + # generative_spec = self.build_specification_and_cache(name, "users", benchmark) + generative_spec = self.build_specification_and_cache(name, "users_updated", benchmark) + + user_info = BaseUserInfo( + semantic_spec=self.user_semantic, stat_info={"GenerativeModelSpecification": generative_spec} + ) + logger.info(f"Searching Market for user: {name}") + + search_result = self.llm_market.search_learnware(user_info) + single_result = search_result.get_single_results() + + scores = {} + for result in single_result: + learnware_name = result.learnware.specification.semantic_spec["Name"]["Values"] + match = re.match(r"(.+)-(\d+)", learnware_name) + dataset_name = match.group(1) + scores[dataset_name] = result.score + + # scores = {r.learnware.specification.semantic_spec["Name"]["Values"]: r.score for r in single_result} + + for k, v in scores.items(): + score_results["User"].append(name) + score_results["Learnware"].append(k) + score_results["Similarity"].append(v) + + if not skip_eval: + configs = eval_configs[benchmark_name] + all_learnwares_ids = self.llm_market.get_learnware_ids() + if benchmark_name == "medical": + score_list = self._get_scores("Qwen/Qwen2.5-7B", None, configs) + performance_table = { + "Qwen2.5-7B": score_list, + "Flan-PaLM-540B": [57.60, 67.60, 63.70, 80.40, 88.90, 76.30, 75.00, 83.80, 79.00] # copied from Open Medical LLM Leaderboard + } + datasets = [config.name for config in configs] + for learnware_id in all_learnwares_ids[:1]: + learnware = self.llm_market.get_learnware_by_ids(learnware_id) + base_model = learnware.specification.semantic_spec["Description"]["Values"].split(' ')[-1] + adapter_path = os.path.join(self.llm_market.get_learnware_dir_path_by_ids(learnware_id), "adapter") + score_list = self._get_scores(base_model, adapter_path, configs) # medical batch_size 不影响 + performance_table[learnware.specification.semantic_spec["Name"]["Values"]] = score_list + performance_table = pd.DataFrame(performance_table) + performance_table = performance_table._append(performance_table.mean().round(2), ignore_index=True) + performance_table.insert(0, "Dataset", datasets+['Avg']) + performance_table.to_csv(f"model_performance/{benchmark_name}-new.csv", index=False) + else: + performance_table = pd.read_csv(f"model_performance/{benchmark_name}.csv") + + results_table = self._anlysis_table(benchmark_name, performance_table, score_results) + self._plot_radar_chart(benchmark_name, results_table[:-4]) + + pd.DataFrame(score_results).to_csv(f"{benchmark_name}_test.csv", index=False) + + +if __name__ == "__main__": + fire.Fire(LLMWorkflow) From 56758b0af323c92bc64dcdc25cb857c2b6b87981 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Thu, 20 Mar 2025 16:31:47 +0800 Subject: [PATCH 086/108] [MNT] update some hyperparameter in llm generative specification and searcher --- learnware/market/llm/searcher.py | 2 +- learnware/specification/regular/text/generative.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index b3c7c2a5..df652b24 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -122,7 +122,7 @@ def _search_by_taskvector_spec_single( return sorted_dist_list, sorted_learnware_list - def _convert_similarity_to_score(self, sorted_similarity_list, temperature=0.05): + def _convert_similarity_to_score(self, sorted_similarity_list, temperature=0.1): sorted_similarity = torch.asarray(sorted_similarity_list) sorted_similarity = torch.stack([ sorted_similarity, torch.zeros_like(sorted_similarity) diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index f4d7f198..12d0759d 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -77,8 +77,8 @@ def __init__(self, self.max_seq_length = max_seq_length self.__extra_args = { - "weight_decay_l1": 1.5, - "weight_decay_l2": .0, + "weight_decay_l1": 1.0, + "weight_decay_l2": 0.5, "max_steps": 400, "lr": 1e-5, "max_grad_norm": 1.0, From 077101a69d632d164d722b6f841ef1ddf8d868e1 Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Fri, 14 Mar 2025 15:33:04 +0800 Subject: [PATCH 087/108] final test of llm_market --- learnware/client/learnware_client.py | 6 +++--- learnware/client/utils.py | 2 +- learnware/config.py | 2 +- learnware/learnware/__init__.py | 2 +- learnware/market/easy/checker.py | 8 ++++---- learnware/market/llm/organizer.py | 17 ++++------------- learnware/market/llm/searcher.py | 1 + learnware/specification/module.py | 2 +- .../specification/regular/text/generative.py | 5 ++++- .../system/llm_general_capability_spec/spec.py | 5 +++++ 10 files changed, 25 insertions(+), 25 deletions(-) diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index a6e52109..498e28d7 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -52,7 +52,7 @@ class SemanticSpecificationKey(Enum): DATA_TYPE = "Data" TASK_TYPE = "Task" LIBRARY_TYPE = "Library" - MODEL_TYPE = "Model Type" + MODEL_TYPE = "Model" SENARIOES = "Scenario" LICENSE = "License" @@ -492,7 +492,7 @@ def check_learnware(learnware_zip_path, semantic_specification=None): name="test", description="test", data_type="Text", - task_type="Segmentation", + task_type="Text Generation", scenarios="Financial", library_type="Scikit-learn", license="Apache-2.0", @@ -507,7 +507,7 @@ def check_learnware(learnware_zip_path, semantic_specification=None): with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: with zipfile.ZipFile(learnware_zip_path, mode="r") as z_file: z_file.extractall(tempdir) - + pass learnware = get_learnware_from_dirpath( id="test", semantic_spec=semantic_specification, learnware_dirpath=tempdir, ignore_error=False ) diff --git a/learnware/client/utils.py b/learnware/client/utils.py index 5e74cbaa..0bbeefc2 100644 --- a/learnware/client/utils.py +++ b/learnware/client/utils.py @@ -8,7 +8,7 @@ logger = get_module_logger(module_name="client_utils") -def system_execute(args, timeout=None, env=None, stdout=None, stderr=subprocess.PIPE): +def system_execute(args, timeout=None, env=None, stdout=None, stderr=None): env = os.environ.copy() if env is None else env args = args if isinstance(args, str) else " ".join(args) diff --git a/learnware/config.py b/learnware/config.py index 200f6fff..7df5c38b 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -101,7 +101,7 @@ def get_platform(): "Values": [ "Base Model", "Fine-tuned Model", - "Adapter Model", + "PEFT Model", "Others", ], "Type": "Optional", diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index 0776d716..31bd8f81 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -72,7 +72,7 @@ def get_learnware_from_dirpath( learnware_weights_path ), f"Weights are not found for the Text Generation Model learnware_{id}, please check the learnware.yaml or zipfile." - if semantic_spec["Model Type"]["Values"] == ["PEFT Model"]: + if semantic_spec["Model"]["Values"] == ["PEFT Model"]: assert "required_learnware_ids" in learnware_config["model"], f"'required_learnware_ids' is not found for the PEFT Model learnware_{id}, please check the learnware.yaml." assert len(learnware_config["model"]["required_learnware_ids"]) != 0, f"'required_learnware_ids' can't be empty for the PEFT Model learnware_{id}, please check the learnware.yaml." diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 2031f42c..8e630140 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -58,7 +58,7 @@ def check_semantic_spec(semantic_spec): "Others", ] - assert semantic_spec["Model Type"]["Values"][0] == "Others" + assert semantic_spec["Model"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Image": assert semantic_spec["Task"]["Values"][0] in [ @@ -70,7 +70,7 @@ def check_semantic_spec(semantic_spec): "Others", ] - assert semantic_spec["Model Type"]["Values"][0] == "Others" + assert semantic_spec["Model"]["Values"][0] == "Others" if semantic_spec["Data"]["Values"][0] == "Text": assert semantic_spec["Task"]["Values"][0] in [ @@ -147,7 +147,7 @@ def __call__(self, learnware): if ( semantic_spec["Data"]["Values"] == ["Text"] and semantic_spec["Task"]["Values"] == ["Text Generation"] - and semantic_spec["Model Type"]["Values"] == ["Base Model"] + and semantic_spec["Model"]["Values"] == ["Base Model"] ): try: general_capability_spec = LLMGeneralCapabilitySpecification() @@ -193,7 +193,7 @@ def __call__(self, learnware): elif spec_type in ["RKMETextSpecification", "GenerativeModelSpecification", "LLMGeneralCapabilitySpecification"]: - if semantic_spec["Model Type"]["Values"][0] != "Others": + if semantic_spec["Model"]["Values"][0] != "Others": len_ = random.randint(10, 1000) inputs = EasyStatChecker._generate_random_text_list(10, "en", len_, len_) else: diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index 89a96fef..ddcbe4f0 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -17,18 +17,6 @@ class LLMEasyOrganizer(HeteroMapTableOrganizer): - # update base model learnware when llm benchmark list updates - def update_learnware( - self, - id: str, - zip_path: str = None, - semantic_spec: dict = None, - check_status: int = None, - ): - final_status = super(LLMEasyOrganizer, self).update_learnware(id, zip_path, semantic_spec, check_status) - if final_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(id)): - self._update_learnware_general_capability_spec(id) - return final_status def _update_learnware_general_capability_spec( self, ids: Union[str, List[str]] @@ -69,6 +57,9 @@ def _update_learnware_general_capability_spec( yaml_config = read_yaml_to_dict(learnware_yaml_path) if "stat_specifications" in yaml_config: yaml_config["stat_specifications"].append(general_capability_spec_config) + else: + yaml_config["stat_specifications"] = [general_capability_spec_config] + pass save_dict_to_yaml(yaml_config, learnware_yaml_path) with zipfile.ZipFile(zip_path, "a") as z_file: @@ -112,7 +103,7 @@ def _get_llm_base_model_learnware_ids(self, ids: Union[str, List[str]]) -> List[ if ( semantic_spec["Data"]["Values"] == ["Text"] and semantic_spec["Task"]["Values"] == ["Text Generation"] - and semantic_spec["Model Type"]["Values"] == ["Base Model"] + and semantic_spec["Model"]["Values"] == ["Base Model"] ): ret.append(idx) return ret diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index b3c7c2a5..c0437956 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -107,6 +107,7 @@ def _search_by_taskvector_spec_single( spec_list = [learnware.specification.get_stat_spec_by_name(stat_spec_type) for learnware in learnware_list] filtered_idx_list, similarity_list = [], [] for idx, s in enumerate(spec_list): + user_spec.task_vector = user_spec.task_vector.to(s.task_vector.device) similarity = float(s.similarity(user_spec)) if np.isfinite(similarity): similarity_list.append(similarity) diff --git a/learnware/specification/module.py b/learnware/specification/module.py index d58d3263..c1b28cd8 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -241,7 +241,7 @@ def generate_semantic_spec( semantic_specification = dict() semantic_specification["Data"] = {"Type": "Class", "Values": [data_type] if data_type is not None else []} semantic_specification["Task"] = {"Type": "Class", "Values": [task_type] if task_type is not None else []} - semantic_specification["Model Type"] = {"Type": "Class", "Values": [model_type] if model_type is not None else ["Others"]} + semantic_specification["Model"] = {"Type": "Optional", "Values": [model_type] if model_type is not None else ["Others"]} semantic_specification["Library"] = { "Type": "Class", "Values": [library_type] if library_type is not None else [], diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index bd658ef3..cfb16719 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -89,7 +89,10 @@ def task_vector(self): raise Exception("Call generate_stat_spec_from_data first!") return self._task_vector - + + @task_vector.setter + def task_vector(self, value): + self._task_vector = value def generate_stat_spec_from_data( self, diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 26e6b6d5..9b43da72 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -114,6 +114,11 @@ def generate_stat_spec_from_system( else: logger.info("All LLMBenchmarkConfig have been evaluated before. No update.") + + def __str__(self): + spec_to_save = self.get_states() + return json.dumps(spec_to_save, separators=(",", ":")) + def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. From 77b00f11ba79196a07ed37df1cbadef2e3ab6f4a Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sat, 22 Mar 2025 00:43:22 +0800 Subject: [PATCH 088/108] [MNT] Complete workflow loading local learnwares --- examples/dataset_llm_workflow/eval_config.py | 44 +++++++-- .../model_performance/finance.csv | 19 ++++ .../model_performance/math.csv | 18 ++++ .../model_performance/medical.csv | 11 +++ examples/dataset_llm_workflow/workflow.py | 95 ++++++++++++++----- 5 files changed, 159 insertions(+), 28 deletions(-) create mode 100644 examples/dataset_llm_workflow/model_performance/finance.csv create mode 100644 examples/dataset_llm_workflow/model_performance/math.csv create mode 100644 examples/dataset_llm_workflow/model_performance/medical.csv diff --git a/examples/dataset_llm_workflow/eval_config.py b/examples/dataset_llm_workflow/eval_config.py index c0b89976..9477db30 100644 --- a/examples/dataset_llm_workflow/eval_config.py +++ b/examples/dataset_llm_workflow/eval_config.py @@ -127,28 +127,60 @@ eval_metric="acc", ), LLMBenchmarkConfig( - name="mmlu_clinical_knowledge", + name="german", eval_metric="acc", ), LLMBenchmarkConfig( - name="mmlu_college_biology", + name="headlines", + eval_metric="avg_f1", + ), + LLMBenchmarkConfig( + name="ner", + eval_metric="entity_f1", + ), + LLMBenchmarkConfig( + name="sm_acl", eval_metric="acc", ), LLMBenchmarkConfig( - name="mmlu_college_medicine", + name="sm_bigdata", eval_metric="acc", ), LLMBenchmarkConfig( - name="mmlu_medical_genetics", + name="sm_cikm", eval_metric="acc", ), LLMBenchmarkConfig( - name="mmlu_professional_medicine", + name="causal20_sc", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="finarg_ecc_arc", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="finarg_ecc_auc", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="fomc", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="ma", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="mlesg", + eval_metric="acc", + ), + LLMBenchmarkConfig( + name="multifin_en", eval_metric="acc", ), ] -eval_configs = { +CONFIG = { "medical": medical_eval_configs, "math": math_eval_configs, "finance": finance_eval_configs diff --git a/examples/dataset_llm_workflow/model_performance/finance.csv b/examples/dataset_llm_workflow/model_performance/finance.csv new file mode 100644 index 00000000..a766dbb7 --- /dev/null +++ b/examples/dataset_llm_workflow/model_performance/finance.csv @@ -0,0 +1,19 @@ +Dataset,Qwen2.5-7B,Llama3.1-8B-Instruct,Llama3.1-8B,Qwen1.5-110B,Qwen2.5-72B,Llama3.1-70B-Instruct,australian-1,australian-2,australian-3,australian-4,australian-5,australian-6,australian-7,australian-8,cra_lendingclub-1,cra_lendingclub-2,cra_lendingclub-3,cra_lendingclub-4,cra_lendingclub-5,cra_lendingclub-6,fpb-1,fpb-2,fpb-3,fpb-4,german-1,german-2,german-3,german-4,german-5,german-6,german-7,german-8,headlines-1,headlines-2,headlines-3,ner-1,ner-2,ner-3,ner-4,sm_cikm-1,sm_cikm-2,sm_cikm-3,sm_cikm-4,sm_cikm-5,sm_cikm-6,sm_cikm-7,sm_cikm-8,sm_cikm-9,sm_cikm-10,sm_cikm-11,sm_cikm-12,sm_cikm-13,sm_cikm-14,sm_cikm-15,sm_cikm-16,sm_cikm-17,sm_acl-1,sm_acl-2,sm_acl-3,sm_acl-4,sm_acl-5,sm_acl-6,sm_acl-7,sm_acl-8,sm_bigdata-1,sm_bigdata-2,sm_bigdata-3,sm_bigdata-4,sm_bigdata-5,sm_bigdata-6,sm_bigdata-7,sm_bigdata-8,sm_bigdata-9,fiqasa-1,fiqasa-2,fiqasa-3,fiqasa-4,fiqasa-5,fiqasa-6,fiqasa-7,fiqasa-8 +australian,43.17,44.6,43.17,43.17,43.17,47.48,66.91,46.76,47.48,63.31,47.48,53.96,69.06,59.71,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,42.45,41.73,42.45,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.88,43.17,43.17,43.17,43.17,43.17,43.17,43.17,43.17 +cra_lendingclub,80.82,76.33,57.34,80.82,47.01,53.07,74.02,76.44,79.64,75.88,79.3,80.12,80.42,80.34,80.82,97.51,93.98,97.4,89.15,93.57,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.38,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,80.82,77.11,80.82,80.82,76.25,80.82,80.79,80.82,80.71,79.67,80.82,79.67,79.26,79.38,80.82,80.82,80.82,80.82,78.86,80.82,80.82,80.01,80.82,80.82,79.38,80.82,80.79,80.82,80.82,80.82,80.82,80.82,80.82 +fiqasa,38.3,40.43,56.17,63.4,64.26,68.51,40.43,41.7,39.15,51.91,39.57,37.45,39.15,46.81,45.11,48.51,45.53,49.36,45.11,51.91,50.64,52.34,50.21,48.94,37.45,38.72,34.04,34.47,33.19,35.74,35.74,35.32,33.62,32.77,29.79,42.98,41.28,52.34,39.15,38.3,40.85,43.83,37.87,42.55,47.23,40.85,43.83,48.94,49.36,55.74,34.47,33.62,54.89,34.04,44.68,33.62,37.45,44.68,40.0,47.23,43.83,41.7,37.45,31.91,44.26,58.3,66.38,42.13,56.6,63.83,41.7,46.81,66.38,78.3,71.49,78.72,85.11,71.91,69.36,78.72,77.45 +fpb,76.08,32.78,30.72,70.72,78.35,78.04,32.37,33.09,34.64,54.85,32.27,33.09,35.05,42.78,74.85,72.16,74.54,73.51,75.05,73.4,82.58,84.43,85.57,84.43,74.02,73.81,75.46,76.19,75.15,74.12,76.19,75.77,78.56,78.04,76.6,75.57,75.36,71.96,75.57,75.67,74.74,74.74,75.57,74.54,73.92,75.46,74.33,74.33,71.55,71.86,75.77,75.77,70.52,75.98,72.78,75.88,75.98,76.8,75.88,72.16,76.19,73.3,76.39,75.67,74.54,72.37,70.52,74.95,73.92,72.06,75.36,74.85,67.94,36.91,40.52,29.38,28.66,40.1,36.49,29.38,30.72 +german,65.0,49.5,66.0,66.0,66.5,43.5,46.5,49.5,44.0,54.0,46.5,41.5,63.5,66.0,59.0,66.0,61.5,35.5,56.0,40.0,36.5,40.0,35.5,35.5,66.0,69.5,67.5,67.0,66.0,66.0,68.0,66.5,65.5,65.0,65.5,36.5,46.0,33.5,56.5,41.5,39.5,37.0,38.5,44.0,35.0,38.0,39.5,39.0,41.0,41.0,35.5,40.5,40.0,38.0,41.5,40.5,64.0,64.5,48.0,66.0,40.0,53.5,35.5,48.5,36.5,35.5,39.0,37.5,37.0,64.0,35.0,37.5,54.0,63.5,66.5,66.0,64.5,63.0,65.5,62.0,64.5 +headlines,74.81,59.95,59.95,62.96,77.84,77.53,59.95,59.95,59.95,59.95,59.95,59.95,59.95,59.95,72.19,73.97,74.39,71.99,73.01,73.84,77.51,78.25,78.1,77.94,72.25,71.8,74.55,74.15,72.1,69.66,74.32,74.08,93.21,95.87,97.76,68.58,72.91,64.07,73.58,73.95,73.26,73.16,73.85,72.9,72.31,73.25,73.66,72.72,72.09,72.02,74.66,74.54,68.28,74.33,71.64,73.96,74.08,71.52,72.68,71.25,73.78,70.8,74.78,75.08,71.57,68.77,65.3,71.07,69.57,65.69,70.54,71.41,67.05,59.95,59.95,59.95,59.95,59.95,59.95,59.95,59.95 +ner,21.75,0.62,9.01,17.89,9.36,9.52,8.13,3.45,7.53,23.06,7.86,10.15,14.12,17.88,29.23,25.64,21.96,28.88,24.59,25.09,24.66,23.5,21.93,22.62,27.86,24.71,23.79,26.38,28.97,26.64,21.69,25.28,23.77,24.8,23.36,57.82,47.07,59.42,46.84,21.87,24.57,24.15,22.13,23.28,25.37,24.52,21.59,25.82,23.48,26.59,19.54,22.61,30.18,21.76,25.79,21.15,26.27,20.12,27.12,26.94,26.15,26.99,28.8,25.14,23.29,27.98,26.94,21.0,26.68,25.79,24.25,26.87,24.39,26.46,19.39,8.26,3.0,14.61,5.68,9.14,8.55 +sm_acl,51.1,51.4,51.34,49.3,51.56,49.38,51.18,51.77,51.75,51.91,51.32,52.04,50.78,51.4,51.02,50.94,50.65,51.08,50.86,50.48,50.56,50.43,50.38,50.51,50.81,51.13,50.94,51.16,51.02,50.94,51.34,51.1,50.67,50.7,50.75,50.65,51.21,50.75,51.29,50.54,50.89,51.8,50.94,50.27,51.16,50.43,50.4,51.08,50.81,50.56,50.08,50.73,51.53,50.7,52.34,50.32,52.69,52.77,53.82,50.65,55.99,52.1,52.39,52.12,52.07,52.15,57.42,52.5,52.23,58.06,52.42,51.96,53.84,50.83,51.13,51.1,51.4,51.67,51.24,51.32,52.02 +sm_bigdata,55.3,55.57,52.79,51.02,50.27,47.76,55.23,53.6,54.35,55.3,55.91,55.64,55.57,54.62,55.16,54.96,55.16,55.23,55.1,55.23,55.84,56.05,55.71,55.91,55.43,55.37,55.37,55.37,55.03,55.1,55.37,55.64,54.96,55.84,55.77,55.5,55.37,55.3,55.57,53.94,52.11,51.29,54.01,52.51,52.45,54.08,51.56,51.43,51.97,50.07,53.8,53.6,51.97,54.08,52.99,54.48,44.63,45.65,47.08,44.16,47.15,46.94,50.14,50.75,49.18,57.54,51.09,49.59,50.75,54.28,51.15,53.74,54.28,55.03,55.3,53.53,53.74,54.14,53.67,52.65,54.14 +sm_cikm,58.44,54.24,54.07,44.01,58.27,47.86,55.03,55.64,54.77,54.94,55.82,55.64,54.07,53.98,58.53,58.71,58.36,58.27,58.71,58.53,58.27,57.74,58.18,57.48,58.36,58.18,58.62,58.36,58.62,57.92,58.79,58.79,58.01,58.01,57.92,58.09,58.79,57.39,58.62,57.13,56.87,56.43,58.09,56.87,56.43,56.34,56.43,54.16,57.22,56.52,56.26,56.52,53.37,56.96,49.96,56.34,43.57,46.81,50.22,41.82,53.81,53.98,51.62,53.54,56.17,52.93,51.88,54.16,52.49,52.58,53.72,56.26,51.97,52.41,54.86,53.89,55.64,50.31,52.32,54.68,56.87 +causal20_sc,65.14,88.48,79.45,83.75,76.17,87.16,86.9,87.36,86.79,84.26,86.62,86.91,86.4,86.93,71.27,70.2,76.73,69.17,70.9,69.51,83.63,83.98,84.21,84.86,71.12,71.02,69.92,72.47,76.96,78.55,75.57,75.09,85.16,89.74,90.94,67.66,68.88,56.43,69.82,73.64,72.3,69.89,70.19,70.86,69.15,71.99,68.15,66.47,76.32,78.01,74.01,71.36,70.77,67.9,68.0,73.22,70.39,67.14,68.18,66.91,70.18,70.41,73.41,77.2,67.37,59.23,60.3,66.49,61.07,53.43,63.77,68.41,59.03,74.95,70.83,67.57,71.15,77.13,76.18,71.7,74.72 +finarg_ecc_arc,64.78,46.67,60.0,62.32,63.04,44.64,49.28,47.39,43.91,47.83,47.39,44.35,47.54,44.78,65.36,65.65,65.07,65.8,64.93,64.64,65.07,63.91,63.62,64.64,64.64,65.07,65.51,64.78,65.07,65.51,66.09,65.65,62.03,56.81,54.78,64.78,64.06,63.77,64.93,65.07,66.96,65.22,64.93,66.96,65.65,65.22,65.65,65.22,66.96,68.26,65.36,65.65,66.09,65.22,66.67,65.8,67.39,67.25,68.12,67.54,65.8,67.54,67.1,67.1,68.26,68.84,68.55,68.12,67.39,69.57,68.41,66.96,69.13,56.81,53.04,56.38,66.67,56.23,56.38,55.65,59.28 +finarg_ecc_auc,48.3,51.81,49.85,55.01,61.71,65.02,52.53,52.01,52.63,52.73,53.15,52.43,52.53,52.22,53.87,56.04,53.97,55.52,55.42,55.11,58.1,56.76,58.62,58.82,51.5,51.91,51.19,54.08,54.39,54.18,54.7,54.28,49.23,48.4,48.4,55.11,50.46,52.43,49.23,53.25,53.04,52.84,52.43,52.12,52.32,54.28,52.43,50.88,52.94,51.81,48.71,49.23,50.57,48.3,50.15,49.85,47.88,49.43,48.5,47.68,48.81,47.57,48.81,49.23,52.22,51.91,50.05,54.7,52.43,50.98,51.7,49.95,47.99,51.6,50.77,49.23,51.91,48.92,50.36,46.34,52.43 +fomc,60.48,29.44,34.68,58.47,57.66,66.13,30.44,33.27,35.69,35.08,31.85,35.48,34.48,34.68,61.09,61.69,61.29,61.9,62.1,62.5,62.9,63.31,62.3,62.3,60.48,59.48,60.28,60.08,60.48,61.09,60.48,60.69,61.9,62.1,60.08,60.28,61.09,61.49,60.89,60.48,59.68,60.69,61.29,59.27,59.88,60.48,60.89,59.88,58.87,58.06,60.69,60.48,60.89,59.88,61.49,60.48,60.08,60.69,61.29,60.08,61.09,60.89,60.28,61.49,60.69,58.47,56.45,61.29,60.48,60.69,62.1,60.89,60.48,38.71,41.33,40.93,41.73,35.89,35.48,37.5,35.28 +ma,79.2,56.4,51.0,81.4,84.6,83.2,67.2,70.0,72.2,67.8,68.2,69.6,70.8,83.8,69.6,69.8,73.8,69.8,70.4,72.2,77.6,78.0,77.2,77.6,73.2,74.8,76.2,74.0,67.4,66.0,73.4,72.0,76.8,80.6,80.4,72.2,75.6,70.4,75.4,79.4,80.6,79.4,79.2,78.6,79.2,78.0,78.4,78.6,81.6,84.0,80.6,79.8,80.2,79.2,80.2,79.8,80.0,77.8,77.0,78.4,79.4,80.4,78.8,81.8,77.6,71.0,78.6,75.6,76.0,74.0,72.4,75.6,71.4,57.0,58.6,59.6,60.4,56.8,57.0,57.0,59.0 +mlesg,35.67,32.67,20.0,34.67,38.67,42.33,28.67,31.0,32.33,29.67,32.0,30.33,29.0,29.67,30.0,30.33,30.0,30.0,29.67,30.0,35.33,35.33,35.67,36.0,34.67,35.0,35.0,35.0,33.33,32.0,34.33,34.33,38.67,36.0,40.33,29.67,37.0,30.0,37.0,33.33,33.67,35.0,35.0,33.33,34.67,34.33,35.0,35.67,32.33,33.33,33.33,34.67,33.33,35.33,33.33,34.33,35.67,32.67,33.33,35.0,34.33,31.67,34.33,32.33,33.0,33.33,30.67,33.67,32.33,34.33,34.33,34.33,32.0,15.33,15.67,16.33,18.67,17.33,15.67,20.33,17.67 +multifin_en,60.99,31.32,28.39,65.38,63.55,68.5,29.85,30.77,27.66,32.6,29.3,32.42,28.94,31.32,61.17,61.54,61.36,60.26,60.07,60.26,61.9,64.29,64.1,63.55,60.62,61.72,61.72,60.62,60.99,61.17,60.81,60.26,60.26,57.88,57.69,61.72,60.81,59.71,60.81,60.26,60.62,60.44,60.44,60.44,60.44,59.71,60.62,61.54,59.71,58.24,60.81,61.54,58.97,61.9,58.79,60.44,60.81,62.09,61.17,59.34,61.17,59.89,61.54,62.45,61.54,60.26,58.06,61.36,61.17,61.72,61.17,60.62,60.26,32.23,35.53,34.98,33.52,30.77,32.23,32.23,29.3 +Avg,57.61,47.19,47.29,58.25,58.35,57.63,49.1,48.45,48.5,52.65,48.5,48.89,51.26,52.76,57.73,59.22,58.91,57.46,57.9,57.61,59.12,59.55,59.13,59.12,57.79,58.01,57.89,58.12,57.81,57.57,58.28,58.16,59.74,59.71,59.61,57.71,58.23,56.62,58.78,56.61,56.69,56.46,56.38,56.62,56.42,56.53,56.26,56.45,57.07,57.43,55.74,56.15,56.53,55.74,56.13,56.13,56.75,56.63,56.26,56.35,56.48,56.48,56.2,56.96,56.01,56.03,56.07,55.77,56.12,57.89,55.41,56.48,56.67,51.41,51.11,49.99,51.18,50.16,49.5,49.56,50.35 diff --git a/examples/dataset_llm_workflow/model_performance/math.csv b/examples/dataset_llm_workflow/model_performance/math.csv new file mode 100644 index 00000000..47781a63 --- /dev/null +++ b/examples/dataset_llm_workflow/model_performance/math.csv @@ -0,0 +1,18 @@ +Dataset,Qwen2.5-7B,Qwen1.5-110B,orca-math-word-problems-200k-1,orca-math-word-problems-200k-2,MWP-Instruct-1,GSM8K_zh-1,MATH_train-1,Arithmo-Data-1,MetaMathQA-1,MetaMathQA-2,MetaMath-GSM240K-1,school_math_0.25M-1,school_math_0.25M-2,MathInstruct-1,MathInstruct-2 +agieval_aqua_rat,41.73,38.98,40.94,41.73,41.34,38.98,39.37,38.98,40.55,40.55,41.73,38.19,38.98,39.37,40.55 +agieval_gaokao_mathcloze,16.95,38.14,12.71,13.56,11.86,7.63,17.8,13.56,5.93,5.08,14.41,9.32,6.78,9.32,17.8 +agieval_gaokao_mathqa,49.86,77.78,50.71,51.28,51.57,51.57,50.71,49.86,52.99,53.85,50.14,45.3,45.58,50.43,48.43 +agieval_math,19.8,19.3,19.8,17.2,20.2,20.6,19.6,17.0,28.7,28.1,21.1,16.7,16.9,18.2,20.1 +agieval_sat_math,55.91,57.27,57.73,57.27,57.27,55.45,50.0,55.45,55.45,57.27,55.0,54.55,56.82,55.0,55.0 +cmmlu_college_mathematics,45.71,47.62,50.48,46.67,51.43,52.38,45.71,49.52,48.57,47.62,49.52,50.48,52.38,47.62,47.62 +cmmlu_elementary_mathematics,65.65,77.83,65.22,64.78,64.35,66.96,65.65,65.65,67.39,66.96,64.78,54.35,56.09,66.09,65.22 +cmmlu_high_school_mathematics,61.59,77.44,64.63,64.02,60.98,61.59,62.8,63.41,64.02,62.8,64.63,56.71,57.32,64.02,64.63 +gsm8k,84.08,84.91,84.0,83.85,82.79,74.37,76.19,80.14,83.85,83.47,84.15,81.5,82.18,79.15,80.97 +mathqa,43.32,48.07,45.63,46.93,42.61,36.65,41.64,41.41,42.38,42.38,41.17,40.13,40.57,40.94,41.27 +mgsm_native_cot_zh,66.4,68.8,67.6,70.0,66.0,73.6,66.4,68.0,68.0,68.8,71.6,61.2,57.2,65.6,68.0 +minerva_math,40.16,47.9,40.9,41.56,39.42,29.64,41.96,45.12,32.2,30.32,36.48,43.14,42.4,27.58,29.94 +abstract_algebra,54.0,53.0,52.0,52.0,55.0,56.0,55.0,53.0,56.0,54.0,54.0,51.0,52.0,52.0,54.0 +college_mathematics,53.0,52.0,52.0,55.0,53.0,51.0,58.0,53.0,53.0,55.0,56.0,48.0,51.0,53.0,56.0 +elementary_mathematics,72.75,78.84,72.75,73.28,75.13,74.07,73.54,75.13,73.81,73.02,73.02,70.9,71.96,73.54,74.34 +high_school_mathematics,55.93,60.0,55.19,55.19,55.56,55.93,55.93,55.93,57.04,56.67,55.56,50.74,51.11,54.81,55.19 +Avg,51.68,57.99,52.02,52.14,51.78,50.4,51.27,51.57,51.87,51.62,52.08,48.26,48.7,49.79,51.19 diff --git a/examples/dataset_llm_workflow/model_performance/medical.csv b/examples/dataset_llm_workflow/model_performance/medical.csv new file mode 100644 index 00000000..e755460a --- /dev/null +++ b/examples/dataset_llm_workflow/model_performance/medical.csv @@ -0,0 +1,11 @@ +Dataset,Qwen2.5-7B,Flan-PaLM-540B,medqa_train&pubmed_causal-1,medqa_train-1,pubmed_causal-1,medalpaca_cleaned-1,medqa_train&medmcqa_train-1,medmcqa_train-1,AlpaCare-1,ChatDoctor-1,ChatDoctor-2,AlpaCare&ChatDoctor-1,AlpaCare&ChatDoctor-2,medalpaca_cleaned&AlpaCare&ChatDoctor-1,medalpaca_cleaned&AlpaCare&ChatDoctor-2 +medmcqa,59.93,57.6,59.48,59.48,60.32,59.81,62.49,62.01,59.77,60.29,60.15,58.93,58.38,59.72,59.55 +medqa_4options,64.18,67.6,65.59,65.59,63.55,63.86,64.81,63.63,62.92,63.63,63.32,62.14,61.67,62.61,62.37 +anatomy,71.85,63.7,71.85,71.85,71.85,70.37,70.37,71.11,71.85,72.59,73.33,70.37,70.37,70.37,71.11 +clinical_knowledge,77.36,80.4,77.36,77.74,78.11,78.87,78.49,79.25,78.49,77.74,76.6,78.49,78.11,78.11,77.74 +college_biology,82.64,88.9,86.11,84.72,81.94,84.72,84.03,85.42,84.03,84.03,81.94,82.64,82.64,84.72,86.11 +college_medicine,69.36,76.3,68.79,69.94,69.36,69.94,68.79,68.21,68.79,67.05,67.63,69.36,68.79,68.79,71.1 +medical_genetics,87.0,75.0,87.0,88.0,88.0,85.0,89.0,89.0,87.0,86.0,88.0,86.0,87.0,85.0,83.0 +professional_medicine,78.68,83.8,76.84,79.78,77.57,77.94,78.68,76.47,77.21,77.57,77.21,75.74,76.1,77.21,76.84 +pubmedqa,75.2,79.0,76.0,75.8,76.8,75.8,76.8,75.8,75.0,74.8,73.8,74.8,75.0,76.2,75.6 +Avg,74.02,74.7,74.34,74.77,74.17,74.03,74.83,74.54,73.9,73.74,73.55,73.16,73.12,73.64,73.71 diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index 78a43261..663db6d5 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -6,25 +6,21 @@ import pandas as pd import torch import shutil +import json import re import numpy as np import matplotlib.pyplot as plt -from typing import List, Dict import lm_eval from lm_eval.models.huggingface import HFLM from learnware.client import LearnwareClient from learnware.logger import get_module_logger from learnware.market import BaseUserInfo, instantiate_learnware_market -from learnware.learnware import Learnware -from learnware.specification import generate_semantic_spec -from learnware.specification import RKMETextSpecification from learnware.specification import GenerativeModelSpecification -from learnware.tests.benchmarks import LLMBenchmarkConfig from benchmark import Benchmark from benchmark.config import USER_FIN, USER_MATH, USER_MED -from eval_config import eval_configs +from eval_config import CONFIG logger = get_module_logger("llm_workflow", level="INFO") @@ -132,7 +128,7 @@ def _plot_radar_chart(self, benchmark_name, results_table): ax.legend(loc="lower left", fontsize=8, bbox_to_anchor=(0.85, 0.9)) plt.tight_layout() - os.makedirs("results/figs", exist_ok=True) + # os.makedirs("results/figs", exist_ok=True) # plt.savefig(f"results/figs/llm-{benchmark_name}.pdf") def _anlysis_table(self, benchmark_name, table, score_results): @@ -264,18 +260,58 @@ def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark return generative_spec - def _get_scores(self, base_model: str, adapter_path, benchmark_configs: List[LLMBenchmarkConfig], batch_size='auto'): - # learnware.instantiate_model() - # model = learnware.get_model().get_model() + def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size='auto'): + benchmark_configs = CONFIG[benchmark_name][6:7] task_manager = lm_eval.tasks.TaskManager() task_names = [config.name for config in benchmark_configs] - lm_obj = HFLM(pretrained=base_model, peft=adapter_path, batch_size=batch_size) - results = lm_eval.simple_evaluate( - model=lm_obj, - tasks=task_names, - task_manager=task_manager, - ) + if benchmark_name == "medical": + lm_obj = HFLM(pretrained=base_model, peft=adapter_path, batch_size=batch_size) + results = lm_eval.simple_evaluate( + model=lm_obj, + tasks=task_names, + task_manager=task_manager, + ) + else: + if benchmark_name == "finance": + batch_size = 32 + results_dir = f"./eval_results/{benchmark_name}" + adapter_id = adapter_path.split("/")[-2] if adapter_path else None + task_names_str = ",".join(task_names) + if adapter_path: + os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ + --model_args pretrained={base_model},peft={adapter_path} \ + --tasks {task_names_str} \ + --batch_size {batch_size} \ + --output_path ./eval_results/{benchmark_name}") + elif base_model in ["Qwen/Qwen1.5-110B", "Qwen/Qwen2.5-72B", "NousResearch/Meta-Llama-3.1-70B-Instruct"]: + os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --num_processes 1 -m lm_eval --model hf \ + --model_args pretrained={base_model},parallelize=True \ + --tasks {task_names_str} \ + --batch_size {batch_size} \ + --output_path ./eval_results/{benchmark_name}") + else: + os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ + --model_args pretrained={base_model} \ + --tasks {task_names_str} \ + --batch_size {batch_size} \ + --output_path ./eval_results/{benchmark_name}") + + if adapter_id: + for dir_name in os.listdir(results_dir): + if adapter_id in dir_name: + results_dir_path = os.path.join(results_dir, dir_name) + results_path = os.path.join(results_dir_path, sorted(os.listdir(results_dir_path))[-1]) + break + else: + for dir_name in os.listdir(results_dir): + if dir_name == base_model.replace("/", "__"): + results_dir_path = os.path.join(results_dir, dir_name) + results_path = os.path.join(results_dir_path, sorted(os.listdir(results_dir_path))[-1]) + break + + with open(results_path, "r", encoding="utf-8") as f: + results = json.load(f) score_list = [] for config in benchmark_configs: @@ -328,23 +364,38 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): score_results["Similarity"].append(v) if not skip_eval: - configs = eval_configs[benchmark_name] all_learnwares_ids = self.llm_market.get_learnware_ids() if benchmark_name == "medical": - score_list = self._get_scores("Qwen/Qwen2.5-7B", None, configs) performance_table = { - "Qwen2.5-7B": score_list, + "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), "Flan-PaLM-540B": [57.60, 67.60, 63.70, 80.40, 88.90, 76.30, 75.00, 83.80, 79.00] # copied from Open Medical LLM Leaderboard } - datasets = [config.name for config in configs] + elif benchmark_name == "math": + performance_table = { + "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), + "Qwen1.5-110B": self._get_scores(benchmark_name, "Qwen/Qwen1.5-110B", None) + } + elif benchmark_name == "finance": + performance_table = { + "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), + "Llama3.1-8B-Instruct": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-8B-Instruct", None), + "Llama3.1-8B": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-8B", None), + "Qwen1.5-110B": self._get_scores(benchmark_name, "Qwen/Qwen1.5-110B", None), + "Qwen2.5-72B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-72B", None), + "Llama3.1-70B-Instruct": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-70B-Instruct", None), + } + for learnware_id in all_learnwares_ids[:1]: learnware = self.llm_market.get_learnware_by_ids(learnware_id) base_model = learnware.specification.semantic_spec["Description"]["Values"].split(' ')[-1] adapter_path = os.path.join(self.llm_market.get_learnware_dir_path_by_ids(learnware_id), "adapter") - score_list = self._get_scores(base_model, adapter_path, configs) # medical batch_size 不影响 + score_list = self._get_scores(benchmark_name, base_model, adapter_path) performance_table[learnware.specification.semantic_spec["Name"]["Values"]] = score_list + performance_table = pd.DataFrame(performance_table) performance_table = performance_table._append(performance_table.mean().round(2), ignore_index=True) + configs = CONFIG[benchmark_name] + datasets = [config.name for config in configs] performance_table.insert(0, "Dataset", datasets+['Avg']) performance_table.to_csv(f"model_performance/{benchmark_name}-new.csv", index=False) else: @@ -353,7 +404,7 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): results_table = self._anlysis_table(benchmark_name, performance_table, score_results) self._plot_radar_chart(benchmark_name, results_table[:-4]) - pd.DataFrame(score_results).to_csv(f"{benchmark_name}_test.csv", index=False) + # pd.DataFrame(score_results).to_csv(f"{benchmark_name}_test.csv", index=False) if __name__ == "__main__": From 4229aa71f3646f80a17c1cf80325ff2cd3f4c5b3 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sat, 22 Mar 2025 00:48:18 +0800 Subject: [PATCH 089/108] [MNT] modify details --- .gitignore | 3 +++ learnware/specification/regular/base.py | 1 + 2 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index f361d742..b36c211d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,9 @@ dist/ *.pkl *.hd5 *.csv +!/examples/dataset_llm_workflow/model_performance/medical.csv +!/examples/dataset_llm_workflow/model_performance/math.csv +!/examples/dataset_llm_workflow/model_performance/finance.csv *.out *.html *.dot diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index 89544556..d6851ee7 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -27,6 +27,7 @@ def similarity(self, other: TaskVectorSpecification) -> float: """Compute cosine similarity between two task vectors. """ v1, v2 = self.task_vector, other.task_vector + v2 = v2.to(v1.device) return cosine_similarity(v1, v2, dim=0) def dist(self, other: BaseStatSpecification): From 1622738a7911069eff1dd985de4b274bd33de4ac Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sun, 23 Mar 2025 00:36:19 +0800 Subject: [PATCH 090/108] [MNT] Complete llm workflow, modify details and init the Readme. --- .gitignore | 3 +- examples/dataset_llm_workflow/Readme.md | 107 +++++++++++++++--- .../dataset_llm_workflow/benchmark/base.py | 8 +- .../dataset_llm_workflow/benchmark/config.py | 107 +++++++++++++++++- .../model_performance/medical.csv | 16 +-- examples/dataset_llm_workflow/workflow.py | 62 ++-------- learnware/specification/regular/base.py | 2 +- 7 files changed, 221 insertions(+), 84 deletions(-) diff --git a/.gitignore b/.gitignore index b36c211d..891813c1 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,5 @@ learnware_pool/ PFS/ data/ examples/results/ -examples/*/results/ \ No newline at end of file +examples/*/results/ +examples/*/user_specs/ \ No newline at end of file diff --git a/examples/dataset_llm_workflow/Readme.md b/examples/dataset_llm_workflow/Readme.md index f368f275..edab5af5 100644 --- a/examples/dataset_llm_workflow/Readme.md +++ b/examples/dataset_llm_workflow/Readme.md @@ -1,16 +1,95 @@ -## 本地运行 -## Build Market +# LLM Dataset Workflow Example + +## Introduction + +Learnware Retrieval with Parameter Vector Specification + +## Run the code + +Run the following command to get results using the model performance table (skip evaluation) in medical/math/finance scenario. We recommend you to run these. + +```bash +python workflow.py llm_example medical +python workflow.py llm_example math +python workflow.py llm_example finance ``` -# 更改用户名 -CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python build_market.py + +Run the following command to get results in medical/math/finance scenario. + +```bash +python workflow.py llm_example medical --skip_eval False +python workflow.py llm_example math --skip_eval False +python workflow.py llm_example fianance --skip_eval False ``` -## workflow -``` -CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example medical -CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example math -CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example finance -``` -## workflow not skip eval -``` -CUDA_VISIBLE_DEVICES=1 PYTHONPATH=/home/zhaozc/Learnware-Private python workflow.py llm_example medical --rebuild False --skip_eval False -``` \ No newline at end of file + +## Results + +### Finance + +| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Best-single | PAVE | Oracle | +|:----------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:--------------|:-------|:---------| +| australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 42.21 | 56.83 | 56.83 | +| cra_lendingclub | 80.82 | 76.33 | 57.34 | 80.82 | 47.01 | 53.07 | 81.52 | 80.82 | 92.07 | 92.07 | +| fiqasa | 38.3 | 40.43 | 56.17 | 63.4 | 64.26 | 68.51 | 46.53 | 32.06 | 76.38 | 76.38 | +| fpb | 76.08 | 32.78 | 30.72 | 70.72 | 78.35 | 78.04 | 67.95 | 77.73 | 84.25 | 84.25 | +| german | 65.0 | 49.5 | 66.0 | 66.0 | 66.5 | 43.5 | 51.5 | 65.33 | 67.06 | 67.06 | +| headlines | 74.81 | 59.95 | 59.95 | 62.96 | 77.84 | 77.53 | 72.43 | 95.61 | 95.61 | 95.61 | +| ner | 21.75 | 0.62 | 9.01 | 17.89 | 9.36 | 9.52 | 24.99 | 23.98 | 52.79 | 52.79 | +| sm_acl | 51.1 | 51.4 | 51.34 | 49.3 | 51.56 | 49.38 | 51.42 | 50.71 | 52.82 | 53.63 | +| sm_bigdata | 55.3 | 55.57 | 52.79 | 51.02 | 50.27 | 47.76 | 53.86 | 55.52 | 52.4 | 55.88 | +| sm_cikm | 58.44 | 54.24 | 54.07 | 44.01 | 58.27 | 47.86 | 55.89 | 57.98 | 55.99 | 58.52 | +| causal20_sc | 65.14 | 88.48 | 79.45 | 83.75 | 76.17 | 87.16 | 74.71 | 88.61 | 84.17 | 88.61 | +| finarg_ecc_arc | 64.78 | 46.67 | 60.0 | 62.32 | 63.04 | 44.64 | 62.27 | 57.87 | 64.31 | 68.36 | +| finarg_ecc_auc | 48.3 | 51.81 | 49.85 | 55.01 | 61.71 | 65.02 | 52.08 | 48.68 | 58.08 | 58.08 | +| fomc | 60.48 | 29.44 | 34.68 | 58.47 | 57.66 | 66.13 | 56.05 | 61.36 | 62.7 | 62.7 | +| ma | 79.2 | 56.4 | 51.0 | 81.4 | 84.6 | 83.2 | 73.64 | 79.27 | 79.81 | 79.81 | +| mlesg | 35.67 | 32.67 | 20.0 | 34.67 | 38.67 | 42.33 | 31.99 | 38.33 | 33.42 | 38.33 | +| multifin_en | 60.99 | 31.32 | 28.39 | 65.38 | 63.55 | 68.5 | 54.96 | 58.61 | 63.46 | 63.46 | +| Avg. | 57.61 | 47.19 | 47.29 | 58.25 | 58.35 | 57.63 | 56.25 | 59.69 | 66.6 | 67.79 | +| Avg. rank | 5.94 | 7.35 | 7.82 | 5.94 | 4.71 | 5.24 | 6.47 | 5.47 | 2.88 | 1.65 | +| PAVE (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | 12/1/4 | nan | 0/11/6 | +| Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 14/3/0 | 6/11/0 | nan | + +### Medical + +| User | Qwen2.5-7B | Flan-PaLM-540B | Random | Best-single | PAVE | Oracle | +|:----------------------|:-------------|:-----------------|:---------|:--------------|:-------|:---------| +| medmcqa | 59.93 | 57.6 | 60.2 | 62.49 | 62.49 | 62.49 | +| medqa_4options | 64.18 | 67.6 | 63.74 | 64.81 | 65.59 | 65.59 | +| anatomy | 71.85 | 63.7 | 71.33 | 70.37 | 71.85 | 72.96 | +| clinical_knowledge | 77.36 | 80.4 | 78.21 | 78.49 | 78.87 | 79.25 | +| college_biology | 82.64 | 88.9 | 84.34 | 84.03 | 85.42 | 86.11 | +| college_medicine | 69.36 | 76.3 | 69.02 | 68.79 | 69.36 | 69.94 | +| medical_genetics | 87.0 | 75.0 | 86.95 | 89.0 | 87.0 | 89.0 | +| professional_medicine | 78.68 | 83.8 | 77.37 | 78.68 | 79.78 | 79.78 | +| pubmedqa | 75.2 | 79.0 | 75.67 | 76.8 | 75.8 | 76.8 | +| Avg. | 74.02 | 74.7 | 74.09 | 74.83 | 75.13 | 75.77 | +| Avg. rank | 4.44 | 2.67 | 4.89 | 3.56 | 2.56 | 1.67 | +| PAVE (win/tie/loss) | 6/3/0 | 3/0/6 | 9/0/0 | 6/1/2 | nan | 0/3/6 | +| Oracle (win/tie/loss) | 9/0/0 | 3/0/6 | 9/0/0 | 6/3/0 | 6/3/0 | nan | + +### Math + +| User | Qwen2.5-7B | Qwen1.5-110B | Random | Best-single | PAVE | Oracle | +|:------------------------------|:-------------|:---------------|:---------|:--------------|:-------|:---------| +| agieval_aqua_rat | 41.73 | 38.98 | 40.09 | 41.33 | 38.98 | 41.73 | +| agieval_gaokao_mathcloze | 16.95 | 38.14 | 11.72 | 13.14 | 17.8 | 17.8 | +| agieval_gaokao_mathqa | 49.86 | 77.78 | 50.35 | 51.0 | 51.57 | 53.42 | +| agieval_math | 19.8 | 19.3 | 20.15 | 18.5 | 20.6 | 28.4 | +| agieval_sat_math | 55.91 | 57.27 | 55.3 | 57.5 | 57.27 | 57.5 | +| cmmlu_college_mathematics | 45.71 | 47.62 | 49.36 | 48.58 | 52.38 | 52.38 | +| cmmlu_elementary_mathematics | 65.65 | 77.83 | 64.49 | 65.0 | 66.96 | 67.18 | +| cmmlu_high_school_mathematics | 61.59 | 77.44 | 62.5 | 64.32 | 60.98 | 64.63 | +| gsm8k | 84.08 | 84.91 | 80.79 | 83.92 | 84.15 | 84.15 | +| mathqa | 43.32 | 48.07 | 41.51 | 46.28 | 41.41 | 46.28 | +| mgsm_native_cot_zh | 66.4 | 68.8 | 67.64 | 68.8 | 73.6 | 73.6 | +| minerva_math | 40.16 | 47.9 | 37.4 | 41.23 | 36.48 | 45.12 | +| abstract_algebra | 54.0 | 53.0 | 53.83 | 52.0 | 56.0 | 56.0 | +| college_mathematics | 53.0 | 52.0 | 53.61 | 53.5 | 53.0 | 58.0 | +| elementary_mathematics | 72.75 | 78.84 | 73.63 | 73.02 | 75.13 | 75.13 | +| high_school_mathematics | 55.93 | 60.0 | 55.21 | 55.19 | 55.56 | 56.86 | +| Avg. | 51.68 | 57.99 | 51.1 | 52.08 | 52.62 | 54.89 | +| Avg. rank | 4.31 | 2.56 | 4.56 | 4.0 | 3.19 | 1.56 | +| PAVE (win/tie/loss) | 10/1/5 | 5/2/9 | 11/0/5 | 10/0/6 | nan | 0/6/10 | +| Oracle (win/tie/loss) | 15/1/0 | 7/0/9 | 16/0/0 | 14/2/0 | 10/6/0 | nan | + diff --git a/examples/dataset_llm_workflow/benchmark/base.py b/examples/dataset_llm_workflow/benchmark/base.py index be783629..72813b2b 100644 --- a/examples/dataset_llm_workflow/benchmark/base.py +++ b/examples/dataset_llm_workflow/benchmark/base.py @@ -48,11 +48,11 @@ def get_user_dataset(self, dataset_name) -> Dataset: test_dataset = prepare_test_data(self.user_dict[dataset_name]) return test_dataset - # def get_learnwares(self): - # return self.learnware_dict + def get_learnwares(self): + return self.learnware_dict - # def get_users(self): - # return self.user_dict + def get_users(self): + return self.user_dict def get_learnware_names(self) -> List[str]: return list(self.learnware_dict.keys()) diff --git a/examples/dataset_llm_workflow/benchmark/config.py b/examples/dataset_llm_workflow/benchmark/config.py index 00208735..4493a777 100644 --- a/examples/dataset_llm_workflow/benchmark/config.py +++ b/examples/dataset_llm_workflow/benchmark/config.py @@ -49,8 +49,8 @@ "medmcqa": "openlifescienceai/medmcqa", "medqa_4options": "GBaker/MedQA-USMLE-4-options-hf", "anatomy": "hails/mmlu_no_train,anatomy", - "college_biology": "hails/mmlu_no_train,college_biology", "clinical_knowledge": "hails/mmlu_no_train,clinical_knowledge", + "college_biology": "hails/mmlu_no_train,college_biology", "college_medicine": "hails/mmlu_no_train,college_medicine", "medical_genetics": "hails/mmlu_no_train,medical_genetics", "professional_medicine": "hails/mmlu_no_train,professional_medicine", @@ -104,14 +104,113 @@ } LEARNWARE_MED_IDS = [ - "00010000" + "00002688", + "00002689", + "00002690", + "00002691", + "00002692", + "00002693", + "00002694", + "00002695", + "00002696", + "00002697", + "00002698", + "00002699", + "00002700", ] LEARNWARE_MATH_IDS = [ - "00010001" + "00002701", + "00002702", + "00002703", + "00002704", + "00002705", + "00002706", + "00002707", + "00002708", + "00002709", + "00002710", + "00002711", + "00002712", + "00002713", ] LEARNWARE_FIN_IDS = [ - "00010002" + "00002714", + "00002715", + "00002716", + "00002717", + "00002718", + "00002719", + "00002720", + "00002720", + "00002721", + "00002722", + "00002723", + "00002724", + "00002725", + "00002726", + "00002727", + "00002728", + "00002729", + "00002730", + "00002731", + "00002732", + "00002733", + "00002734", + "00002735", + "00002736", + "00002737", + "00002738", + "00002739", + "00002740", + "00002741", + "00002742", + "00002743", + "00002744", + "00002745", + "00002746", + "00002747", + "00002748", + "00002749", + "00002750", + "00002751", + "00002752", + "00002753", + "00002754", + "00002755", + "00002756", + "00002757", + "00002758", + "00002759", + "00002760", + "00002761", + "00002762", + "00002763", + "00002764", + "00002765", + "00002766", + "00002767", + "00002768", + "00002769", + "00002770", + "00002771", + "00002772", + "00002773", + "00002774", + "00002775", + "00002776", + "00002777", + "00002778", + "00002779", + "00002780", + "00002781", + "00002782", + "00002783", + "00002784", + "00002785", + "00002786", + "00002787", + "00002788", ] \ No newline at end of file diff --git a/examples/dataset_llm_workflow/model_performance/medical.csv b/examples/dataset_llm_workflow/model_performance/medical.csv index e755460a..7f557ef7 100644 --- a/examples/dataset_llm_workflow/model_performance/medical.csv +++ b/examples/dataset_llm_workflow/model_performance/medical.csv @@ -1,11 +1,11 @@ Dataset,Qwen2.5-7B,Flan-PaLM-540B,medqa_train&pubmed_causal-1,medqa_train-1,pubmed_causal-1,medalpaca_cleaned-1,medqa_train&medmcqa_train-1,medmcqa_train-1,AlpaCare-1,ChatDoctor-1,ChatDoctor-2,AlpaCare&ChatDoctor-1,AlpaCare&ChatDoctor-2,medalpaca_cleaned&AlpaCare&ChatDoctor-1,medalpaca_cleaned&AlpaCare&ChatDoctor-2 -medmcqa,59.93,57.6,59.48,59.48,60.32,59.81,62.49,62.01,59.77,60.29,60.15,58.93,58.38,59.72,59.55 -medqa_4options,64.18,67.6,65.59,65.59,63.55,63.86,64.81,63.63,62.92,63.63,63.32,62.14,61.67,62.61,62.37 +medmcqa,59.93,57.6,59.48,59.48,60.46,59.81,62.49,62.01,59.77,60.29,60.15,58.93,58.38,59.72,59.55 +medqa_4options,64.18,67.6,65.59,65.59,63.16,63.86,64.81,63.63,62.92,63.63,63.32,62.14,61.67,62.61,62.37 anatomy,71.85,63.7,71.85,71.85,71.85,70.37,70.37,71.11,71.85,72.59,73.33,70.37,70.37,70.37,71.11 -clinical_knowledge,77.36,80.4,77.36,77.74,78.11,78.87,78.49,79.25,78.49,77.74,76.6,78.49,78.11,78.11,77.74 -college_biology,82.64,88.9,86.11,84.72,81.94,84.72,84.03,85.42,84.03,84.03,81.94,82.64,82.64,84.72,86.11 +clinical_knowledge,77.36,80.4,77.36,77.74,78.49,78.87,78.49,79.25,78.49,77.74,76.6,78.49,78.11,78.11,77.74 +college_biology,82.64,88.9,86.11,84.72,83.33,84.72,84.03,85.42,84.03,84.03,81.94,82.64,82.64,84.72,86.11 college_medicine,69.36,76.3,68.79,69.94,69.36,69.94,68.79,68.21,68.79,67.05,67.63,69.36,68.79,68.79,71.1 -medical_genetics,87.0,75.0,87.0,88.0,88.0,85.0,89.0,89.0,87.0,86.0,88.0,86.0,87.0,85.0,83.0 -professional_medicine,78.68,83.8,76.84,79.78,77.57,77.94,78.68,76.47,77.21,77.57,77.21,75.74,76.1,77.21,76.84 -pubmedqa,75.2,79.0,76.0,75.8,76.8,75.8,76.8,75.8,75.0,74.8,73.8,74.8,75.0,76.2,75.6 -Avg,74.02,74.7,74.34,74.77,74.17,74.03,74.83,74.54,73.9,73.74,73.55,73.16,73.12,73.64,73.71 +medical_genetics,87.0,75.0,87.0,88.0,87.0,85.0,89.0,89.0,87.0,86.0,88.0,86.0,87.0,85.0,83.0 +professional_medicine,78.68,83.8,76.84,79.78,76.47,77.94,78.68,76.47,77.21,77.57,77.21,75.74,76.1,77.21,76.84 +pubmedqa,75.2,79.0,76.0,75.8,76.4,75.8,76.8,75.8,75.0,74.8,73.8,74.8,75.0,76.2,75.6 +Avg,74.02,74.7,74.34,74.77,74.06,74.03,74.83,74.54,73.9,73.74,73.55,73.16,73.12,73.64,73.71 diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index 663db6d5..2b466dd9 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -25,27 +25,6 @@ logger = get_module_logger("llm_workflow", level="INFO") -def build_specification_from_cache(generative_spec_path, dataset_name): - print(f"Build PAVE from cache to {generative_spec_path}") - if dataset_name in USER_FIN: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-finance-GridSearch-qwen/condidate-{1}/user-{dataset_name}/finetuned.pt", weights_only=False) - elif dataset_name in USER_MED: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-med-GridSearch-qwen-backup/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) - elif dataset_name in USER_MATH: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-math-GridSearch-qwen/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) - else: - raise NotImplementedError("Invalid dataset_name") - - finetuned_state_dict = finetuned_checkpoint["state_dict"]["model"] - task_vector = torch.concatenate([ - p.reshape(-1) for n, p in finetuned_state_dict.items() - ]) - torch.save({ - "type": "GenerativeModelSpecification", - "task_vector": task_vector.detach().cpu() - }, generative_spec_path) - - class LLMWorkflow: def _plot_radar_chart(self, benchmark_name, results_table): labels = list(results_table.index) @@ -128,8 +107,8 @@ def _plot_radar_chart(self, benchmark_name, results_table): ax.legend(loc="lower left", fontsize=8, bbox_to_anchor=(0.85, 0.9)) plt.tight_layout() - # os.makedirs("results/figs", exist_ok=True) - # plt.savefig(f"results/figs/llm-{benchmark_name}.pdf") + os.makedirs("results/figs", exist_ok=True) + plt.savefig(f"results/figs/llm-{benchmark_name}.pdf") def _anlysis_table(self, benchmark_name, table, score_results): if benchmark_name == 'finance': @@ -202,9 +181,9 @@ def _anlysis_table(self, benchmark_name, table, score_results): adaptation_table.loc["PAVE (win/tie/loss)"] = win_tie_loss adaptation_table.loc["Oracle (win/tie/loss)"] = win_tie_loss_o + print(adaptation_table.to_markdown()) os.makedirs("results/tables", exist_ok=True) - # adaptation_table.to_csv(f"results/tables/llm-{benchmark_name}.csv") - print(adaptation_table) + adaptation_table.to_csv(f"results/tables/llm-{benchmark_name}.csv") return adaptation_table @@ -232,16 +211,6 @@ def _prepare_market(self, benchmark: Benchmark, rebuild=False): continue logger.info("Total Item: %d" % (len(self.llm_market))) - - def _prepare_market_from_disk(self, benchmark: Benchmark, rebuild=False): - self.llm_benchmark = benchmark - self.llm_market = instantiate_learnware_market(market_id=f"llm_{self.llm_benchmark.name}", name="llm", rebuild=rebuild) - self.user_semantic = copy.deepcopy(self.llm_market.get_learnwares()[0].specification.semantic_spec) - self.user_semantic["Name"]["Values"] = "" - self.user_semantic["Description"]["Values"] = "" - self.user_semantic["License"]["Values"] = ['Apache-2.0', 'Others'] - logger.info("Total Item: %d" % (len(self.llm_market))) - def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark): generative_spec = GenerativeModelSpecification() @@ -252,8 +221,6 @@ def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark if os.path.exists(generative_spec_path): generative_spec.load(generative_spec_path) else: - # build_specification_from_cache(generative_spec_path, name) - # generative_spec.load(generative_spec_path) train_dataset = benchmark.get_user_dataset(name) generative_spec.generate_stat_spec_from_data(dataset=train_dataset) generative_spec.save(generative_spec_path) @@ -261,7 +228,7 @@ def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark return generative_spec def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size='auto'): - benchmark_configs = CONFIG[benchmark_name][6:7] + benchmark_configs = CONFIG[benchmark_name] task_manager = lm_eval.tasks.TaskManager() task_names = [config.name for config in benchmark_configs] @@ -273,8 +240,6 @@ def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size= task_manager=task_manager, ) else: - if benchmark_name == "finance": - batch_size = 32 results_dir = f"./eval_results/{benchmark_name}" adapter_id = adapter_path.split("/")[-2] if adapter_path else None task_names_str = ",".join(task_names) @@ -324,8 +289,7 @@ def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size= def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): benchmark = Benchmark(benchmark_name) - # self._prepare_market(benchmark, rebuild) # online - self._prepare_market_from_disk(benchmark, rebuild) + self._prepare_market(benchmark, rebuild) user_names = benchmark.get_user_names() score_results = { @@ -338,8 +302,7 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): title = "=" * 20 + name + "=" * 20 print(title) - # generative_spec = self.build_specification_and_cache(name, "users", benchmark) - generative_spec = self.build_specification_and_cache(name, "users_updated", benchmark) + generative_spec = self.build_specification_and_cache(name, "user_specs", benchmark) user_info = BaseUserInfo( semantic_spec=self.user_semantic, stat_info={"GenerativeModelSpecification": generative_spec} @@ -355,8 +318,6 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): match = re.match(r"(.+)-(\d+)", learnware_name) dataset_name = match.group(1) scores[dataset_name] = result.score - - # scores = {r.learnware.specification.semantic_spec["Name"]["Values"]: r.score for r in single_result} for k, v in scores.items(): score_results["User"].append(name) @@ -385,7 +346,7 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): "Llama3.1-70B-Instruct": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-70B-Instruct", None), } - for learnware_id in all_learnwares_ids[:1]: + for learnware_id in all_learnwares_ids: learnware = self.llm_market.get_learnware_by_ids(learnware_id) base_model = learnware.specification.semantic_spec["Description"]["Values"].split(' ')[-1] adapter_path = os.path.join(self.llm_market.get_learnware_dir_path_by_ids(learnware_id), "adapter") @@ -394,18 +355,15 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): performance_table = pd.DataFrame(performance_table) performance_table = performance_table._append(performance_table.mean().round(2), ignore_index=True) - configs = CONFIG[benchmark_name] - datasets = [config.name for config in configs] + datasets = benchmark.get_user_names() performance_table.insert(0, "Dataset", datasets+['Avg']) performance_table.to_csv(f"model_performance/{benchmark_name}-new.csv", index=False) else: - performance_table = pd.read_csv(f"model_performance/{benchmark_name}.csv") + performance_table = pd.read_csv(f"model_performance/{benchmark_name}-new.csv") results_table = self._anlysis_table(benchmark_name, performance_table, score_results) self._plot_radar_chart(benchmark_name, results_table[:-4]) - # pd.DataFrame(score_results).to_csv(f"{benchmark_name}_test.csv", index=False) - if __name__ == "__main__": fire.Fire(LLMWorkflow) diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index d6851ee7..f43b3c95 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -27,7 +27,7 @@ def similarity(self, other: TaskVectorSpecification) -> float: """Compute cosine similarity between two task vectors. """ v1, v2 = self.task_vector, other.task_vector - v2 = v2.to(v1.device) + return cosine_similarity(v1, v2, dim=0) def dist(self, other: BaseStatSpecification): From ec0cc625acfb1bc19e43e70ab90d6c518d9796e8 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Tue, 25 Mar 2025 15:21:38 +0800 Subject: [PATCH 091/108] [MNT] modify details and update README in llm workflow --- .../{Readme.md => README.md} | 37 +- .../extra_tasks/flare/australian.yaml | 2 + .../extra_tasks/flare/causal20_sc.yaml | 2 + .../extra_tasks/flare/cd.yaml | 2 + .../extra_tasks/flare/convfinqa.yaml | 2 + .../extra_tasks/flare/cra_ccf.yaml | 2 + .../extra_tasks/flare/cra_ccfraud.yaml | 2 + .../extra_tasks/flare/cra_lendingclub.yaml | 2 + .../extra_tasks/flare/cra_polish.yaml | 2 + .../extra_tasks/flare/cra_portoseguro.yaml | 2 + .../extra_tasks/flare/cra_taiwan.yaml | 2 + .../extra_tasks/flare/cra_travelinsurace.yaml | 2 + .../extra_tasks/flare/ectsum.yaml | 2 + .../extra_tasks/flare/edtsum.yaml | 2 + .../extra_tasks/flare/finarg_ecc_arc.yaml | 2 + .../extra_tasks/flare/finarg_ecc_auc.yaml | 2 + .../extra_tasks/flare/finer_ord.yaml | 2 + .../extra_tasks/flare/finqa.yaml | 2 + .../extra_tasks/flare/finred.yaml | 2 + .../extra_tasks/flare/fiqasa.yaml | 2 + .../extra_tasks/flare/flare.py | 1470 +++++++++++++++++ .../extra_tasks/flare/fnxl.yaml | 2 + .../extra_tasks/flare/fomc.yaml | 2 + .../extra_tasks/flare/fpb.yaml | 2 + .../extra_tasks/flare/fsrl.yaml | 2 + .../extra_tasks/flare/german.yaml | 2 + .../extra_tasks/flare/headlines.yaml | 2 + .../extra_tasks/flare/ma.yaml | 2 + .../extra_tasks/flare/mlesg.yaml | 2 + .../extra_tasks/flare/multifin_en.yaml | 2 + .../extra_tasks/flare/ner.yaml | 2 + .../extra_tasks/flare/sm_acl.yaml | 2 + .../extra_tasks/flare/sm_bigdata.yaml | 2 + .../extra_tasks/flare/sm_cikm.yaml | 2 + .../extra_tasks/flare/tatqa.yaml | 2 + .../extra_tasks/flare/tsa.yaml | 2 + examples/dataset_llm_workflow/workflow.py | 2 +- 37 files changed, 1572 insertions(+), 5 deletions(-) rename examples/dataset_llm_workflow/{Readme.md => README.md} (66%) create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/australian.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/causal20_sc.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cd.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/convfinqa.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_ccf.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_ccfraud.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_lendingclub.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_polish.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_portoseguro.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_taiwan.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/cra_travelinsurace.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/ectsum.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/edtsum.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_arc.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_auc.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/finer_ord.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/finqa.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/finred.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/fiqasa.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/flare.py create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/fnxl.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/fomc.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/fpb.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/fsrl.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/german.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/headlines.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/ma.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/mlesg.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/multifin_en.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/ner.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/sm_acl.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/sm_bigdata.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/sm_cikm.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/tatqa.yaml create mode 100644 examples/dataset_llm_workflow/extra_tasks/flare/tsa.yaml diff --git a/examples/dataset_llm_workflow/Readme.md b/examples/dataset_llm_workflow/README.md similarity index 66% rename from examples/dataset_llm_workflow/Readme.md rename to examples/dataset_llm_workflow/README.md index edab5af5..adb729a4 100644 --- a/examples/dataset_llm_workflow/Readme.md +++ b/examples/dataset_llm_workflow/README.md @@ -2,11 +2,19 @@ ## Introduction -Learnware Retrieval with Parameter Vector Specification +This workflow refers to Section 5 of our paper "Learnware Retrieval with Parameter Vector Specification". We build three learnware dock systems of 8B-level LLMs across three domains: finance, healthcare, and mathematics. We evaluate them on public evaluation benchmarks. + +We first train multiple models under different configurations by SFT on different datasets using LoRA. Qwen2.5-7B, Llama3.1-8B, Llama3.1-8B-Instruct are our base models. Then we generate specifications for each model and apply a retrieval algorithm to select the most suitable learnware based on user task requirements. The retrieved learnware is then evaluated on the corresponding task under the **Task-Level** evaluation setting using EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness). + +We compare PAVE against several baselines, including the Random selection strategy, the Best-single model, base models used for fine-tuning, and well-known LLMs with over 70B parameters. Best-single refers to the model with the highest average score among the learnware candidates. + +We do not distinguish between different models fine-tuned with the same instruction dataset, so if our method select a learnware for solving a given task, the performance is actually calculated by the average of all the models with the selected instruction dataset. ## Run the code -Run the following command to get results using the model performance table (skip evaluation) in medical/math/finance scenario. We recommend you to run these. +Since the evaluation of LLM is a time-consuming process, we provide our evaluation results of all models in a table to help you quickly get the final system performance. + +Run the following command to get results using the performance table of all models in medical/math/finance scenario (skip evaluation). **We recommend you to run these.** ```bash python workflow.py llm_example medical @@ -14,18 +22,26 @@ python workflow.py llm_example math python workflow.py llm_example finance ``` -Run the following command to get results in medical/math/finance scenario. +Run the following command to obtain results for medical, mathematical, and financial scenarios (including evaluation). In the medical scenario, it takes 3-4 hours to get the final results on one A100 GPU. For math and finance scenario, the process is significantly more time-consuming and requires at least four A100 GPUs. ```bash python workflow.py llm_example medical --skip_eval False python workflow.py llm_example math --skip_eval False -python workflow.py llm_example fianance --skip_eval False +python workflow.py llm_example finance --skip_eval False +``` + +Following [FinBen](https://github.com/The-FinAI/PIXIU), for evaluation in finance scenario, you need to first copy the folder ```extra_tasks/flare``` into the ```tasks``` directory within the installation path of ```lm_eval```. For example, run the following command: + +```bash +cp -r extra_tasks/flare ~/anaconda3/envs/{env_name}/lib/python3.11/site-packages/lm_eval/tasks/ ``` ## Results ### Finance +The table below shows the performance value of different methods or language models in finance scenario. + | User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Best-single | PAVE | Oracle | |:----------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:--------------|:-------|:---------| | australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 42.21 | 56.83 | 56.83 | @@ -50,8 +66,14 @@ python workflow.py llm_example fianance --skip_eval False | PAVE (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | 12/1/4 | nan | 0/11/6 | | Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 14/3/0 | 6/11/0 | nan | +Our method, PAVE, demonstrates strong performance across financial tasks, achieving the highest average score among all methods, delivering an nearly 14\% improvement compared with the best large-scale model Qwen2.5-72B. It ranks first among learnware retrieval methods in 13 out of 17 tasks, retrieves the optimal learnware (tied with Oracle) on 11 and outperforms all contenders in 8. + +These results shows that our system can match or surpass large-scale models with over 70B parameters under the Task-Level evaluation setting, while requiring only the memory for models under 8B efficiently. + ### Medical +The table below shows the performance value of different methods or language models in medical scenario. + | User | Qwen2.5-7B | Flan-PaLM-540B | Random | Best-single | PAVE | Oracle | |:----------------------|:-------------|:-----------------|:---------|:--------------|:-------|:---------| | medmcqa | 59.93 | 57.6 | 60.2 | 62.49 | 62.49 | 62.49 | @@ -68,8 +90,14 @@ python workflow.py llm_example fianance --skip_eval False | PAVE (win/tie/loss) | 6/3/0 | 3/0/6 | 9/0/0 | 6/1/2 | nan | 0/3/6 | | Oracle (win/tie/loss) | 9/0/0 | 3/0/6 | 9/0/0 | 6/3/0 | 6/3/0 | nan | +As shown, PAVE achieves the highest average score across 9 tasks, even surpassing the large-scale model Flan-PaLM-540B. This demonstrates that our system, leveraging multiple models with fewer than 8B parameters, can outperform a single large-scale model in task-specific scenarios. Among learnware retrieval methods, PAVE performs best in 7 out of 9 tasks, tied with Oracle in 6. + +Furthermore, PAVE outperforming Best-single suggests that its effectiveness comes not from a single exceptionally strong model but from its retrieval mechanism and the collective strength of all candidate models. + ### Math +The table below shows the performance value of different methods or language models in math scenario. + | User | Qwen2.5-7B | Qwen1.5-110B | Random | Best-single | PAVE | Oracle | |:------------------------------|:-------------|:---------------|:---------|:--------------|:-------|:---------| | agieval_aqua_rat | 41.73 | 38.98 | 40.09 | 41.33 | 38.98 | 41.73 | @@ -93,3 +121,4 @@ python workflow.py llm_example fianance --skip_eval False | PAVE (win/tie/loss) | 10/1/5 | 5/2/9 | 11/0/5 | 10/0/6 | nan | 0/6/10 | | Oracle (win/tie/loss) | 15/1/0 | 7/0/9 | 16/0/0 | 14/2/0 | 10/6/0 | nan | +PAVE achieves optimal retrieval performance (tied with Oracle) in 10 out of 16 tasks and even outperforms all other contenders in 5. However, the large-scale model achieves the highest average score and even beats Oracle (which denotes the optimal performance using one of our 8B-level models). This is likely due to their strong reasoning abilities that lack in smaller models, rather than a shortcoming of our method, as evidenced by the minimal difference in the "win/tie/loss" of PAVE and Oracle on Qwen1.5-110B. diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/australian.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/australian.yaml new file mode 100644 index 00000000..2ba0dc42 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/australian.yaml @@ -0,0 +1,2 @@ +task: australian +class: !function flare.Australian diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/causal20_sc.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/causal20_sc.yaml new file mode 100644 index 00000000..03186515 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/causal20_sc.yaml @@ -0,0 +1,2 @@ +task: causal20_sc +class: !function flare.Causal20SC diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cd.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cd.yaml new file mode 100644 index 00000000..594b701d --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cd.yaml @@ -0,0 +1,2 @@ +task: cd +class: !function flare.CD diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/convfinqa.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/convfinqa.yaml new file mode 100644 index 00000000..6806ef5d --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/convfinqa.yaml @@ -0,0 +1,2 @@ +task: convfinqa +class: !function flare.ConvFinQA diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccf.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccf.yaml new file mode 100644 index 00000000..5505edc8 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccf.yaml @@ -0,0 +1,2 @@ +task: cra_ccf +class: !function flare.ccf \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccfraud.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccfraud.yaml new file mode 100644 index 00000000..9289db9b --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_ccfraud.yaml @@ -0,0 +1,2 @@ +task: cra_ccfraud +class: !function flare.ccfraud diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_lendingclub.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_lendingclub.yaml new file mode 100644 index 00000000..de7609c2 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_lendingclub.yaml @@ -0,0 +1,2 @@ +task: cra_lendingclub +class: !function flare.lendingclub diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_polish.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_polish.yaml new file mode 100644 index 00000000..3d3d50e1 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_polish.yaml @@ -0,0 +1,2 @@ +task: cra_polish +class: !function flare.polish diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_portoseguro.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_portoseguro.yaml new file mode 100644 index 00000000..6c79245a --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_portoseguro.yaml @@ -0,0 +1,2 @@ +task: cra_portoseguro +class: !function flare.portoseguro diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_taiwan.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_taiwan.yaml new file mode 100644 index 00000000..d0948067 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_taiwan.yaml @@ -0,0 +1,2 @@ +task: cra_taiwan +class: !function flare.taiwan diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/cra_travelinsurace.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/cra_travelinsurace.yaml new file mode 100644 index 00000000..80d70661 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/cra_travelinsurace.yaml @@ -0,0 +1,2 @@ +task: cra_travelinsurace +class: !function flare.travelinsurace \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/ectsum.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/ectsum.yaml new file mode 100644 index 00000000..7bdc06a2 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/ectsum.yaml @@ -0,0 +1,2 @@ +task: ectsum +class: !function flare.ECTSUM \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/edtsum.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/edtsum.yaml new file mode 100644 index 00000000..7dbf158c --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/edtsum.yaml @@ -0,0 +1,2 @@ +task: edtsum +class: !function flare.EDTSUM \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_arc.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_arc.yaml new file mode 100644 index 00000000..bda9917e --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_arc.yaml @@ -0,0 +1,2 @@ +task: finarg_ecc_arc +class: !function flare.FINARGECCARC \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_auc.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_auc.yaml new file mode 100644 index 00000000..2a04806f --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/finarg_ecc_auc.yaml @@ -0,0 +1,2 @@ +task: finarg_ecc_auc +class: !function flare.FINARGECCAUC \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/finer_ord.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/finer_ord.yaml new file mode 100644 index 00000000..9ed571c6 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/finer_ord.yaml @@ -0,0 +1,2 @@ +task: finer_ord +class: !function flare.FinerOrd diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/finqa.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/finqa.yaml new file mode 100644 index 00000000..e13381d5 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/finqa.yaml @@ -0,0 +1,2 @@ +task: finqa +class: !function flare.FinQA diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/finred.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/finred.yaml new file mode 100644 index 00000000..0c43a65e --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/finred.yaml @@ -0,0 +1,2 @@ +task: finred +class: !function flare.FinRED diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/fiqasa.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/fiqasa.yaml new file mode 100644 index 00000000..e47c3e47 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/fiqasa.yaml @@ -0,0 +1,2 @@ +task: fiqasa +class: !function flare.FIQASA diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/flare.py b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py new file mode 100644 index 00000000..e6fe0248 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py @@ -0,0 +1,1470 @@ +""" +FLARE +""" +from lm_eval.api.instance import Instance +import numpy as np +from seqeval.metrics import f1_score as entity_score +from sklearn.metrics import f1_score, matthews_corrcoef, mean_squared_error +import evaluate +import re +from lm_eval.api.task import ConfigurableTask +import os + +def mean(arr): + return sum(arr) / len(arr) + +def process_text(entity_string, text): + # Initialize + entity_list = [(", ".join(val.split(", ")[:-1]), val.split(", ")[-1]) for val in entity_string.split("\n")] + text_words = text.split() + labels = ['O'] * len(text_words) + # text_lower = text.lower() + text_lower = text + + # Create a list to store the start index of each word + word_indices = [0] + for word in text_words[:-1]: + word_indices.append(word_indices[-1] + len(word) + 1) + + # Iterate over the entity list + # print (entity_list) + for entity, entity_type in entity_list: + entity_words = entity.split() + entity_lower = entity + + # Find start and end index of each occurrence of the entity in the text + start = 0 + while True: + start = text_lower.find(entity_lower, start) + if not entity or start == -1: break # No more occurrence + end = start + len(entity) - 1 + + # Find the words included in this occurrence + try: + start_word = next(i for i, ind in enumerate(word_indices) if ind >= start) + end_word = next(i for i, ind in enumerate(word_indices) if ind > end) + + # Label the words + labels[start_word] = 'B-' + entity_type + for i in range(start_word+1, end_word): + labels[i] = 'I-' + entity_type + + # Move to the next character after the occurrence + except Exception: + pass + start = end + 1 + + return labels + + +_CITATION = """ +@misc{xie2023pixiu, + title={PIXIU: A Large Language Model, Instruction Data and Evaluation Benchmark for Finance}, + author={Qianqian Xie and Weiguang Han and Xiao Zhang and Yanzhao Lai and Min Peng and Alejandro Lopez-Lira and Jimin Huang}, + year={2023}, + eprint={2306.05443}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + + +class Classification(ConfigurableTask): + CALCULATE_MCC = True + LOWER_CASE = True + VERSION = 1 + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return True + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def doc_to_decontamination_query(self, doc): + return doc["text"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["answer"] + + def process_results(self, doc, results): + gold: str = doc["choices"][doc["gold"]] + if self.LOWER_CASE: + gold = gold.lower() + ini_result = results[0].strip() + if self.LOWER_CASE: + ini_result = ini_result.lower() + + result = None + for choice in doc["choices"]: + if self.LOWER_CASE: + choice = choice.lower() + if choice in ini_result: + result = choice + break + if result is None: + result = "missing" + + acc = 1.0 if gold == result else 0.0 + + results = { + "acc": acc, + "missing": int(result == "missing"), + "f1": (result, gold), + "macro_f1": (result, gold), + } + + if self.CALCULATE_MCC: + results["mcc"] = (result, gold) + + return results + + def higher_is_better(self): + metrics = { + "acc": True, + "f1": True, + "macro_f1": True, + "missing": False, + } + if self.CALCULATE_MCC: + metrics["mcc"] = True + return metrics + + def weighted_f1(self, items): + preds, golds = zip(*items) + labels = list(set(golds)) + preds = np.array(preds) + golds = np.array(golds) + f1 = f1_score(golds, preds, average="weighted", labels=labels) + return f1 + + def macro_f1(self, items): + preds, golds = zip(*items) + labels = list(set(golds)) + preds = np.array(preds) + golds = np.array(golds) + f1 = f1_score(golds, preds, average="macro", labels=labels) + return f1 + + def matthews_corrcoef(self, items): + preds, golds = zip(*items) + labels = {label: i for i, label in enumerate(list(set(golds)))} + preds = [labels.get(pred, -1) for pred in preds] + golds = [labels.get(gold, -1) for gold in golds] + return matthews_corrcoef(golds, preds) + + def aggregation(self): + metrics = { + "acc": mean, + "missing": mean, + "f1": self.weighted_f1, + "macro_f1": self.macro_f1, + } + if self.CALCULATE_MCC: + metrics["mcc"] = self.matthews_corrcoef + return metrics + + +class SequentialLabeling(ConfigurableTask): + VERSION = 1 + DATASET_NAME = None + LMAP = {"O": 0} + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + return "\nAnswer: " + doc["answer"] + + def process_results(self, doc, results): + return { + "entity_f1": (doc["label"], results[0], doc["token"]), + "f1": (doc["label"], results[0], doc["token"]), + } + + def higher_is_better(self): + return { + "f1": True, + "entity_f1": True, + } + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def process_result(self, pred, gold, tokens): + format_pred = ["O"] * len(gold) + for index, pre in enumerate(pred.split("\n")[: len(tokens)]): + try: + word, label = pre.split(":") + except: + continue + if word == tokens[index] and label in self.LMAP.keys(): + format_pred[index] = label + return format_pred + + def entity_f1(self, items): + golds, preds, tokens = zip(*items) + + list_preds = [ + self.process_result(pred, gold, token) + for pred, gold, token in zip(preds, golds, tokens) + ] + f1 = entity_score(golds, list_preds) + return f1 + + def process_label_result(self, pred, gold, tokens): + format_pred = [-1] * len(gold) + for index, pre in enumerate(pred.split("\n")[: len(tokens)]): + try: + word, label = pre.split(":") + except: + continue + if word == tokens[index]: + format_pred[index] = self.LMAP.get(label, -1) + return format_pred + + def label_f1(self, items): + golds, preds, tokens = zip(*items) + + list_preds = [ + self.process_label_result(pred, gold, token) + for pred, gold, token in zip(preds, golds, tokens) + ] + list_preds = [item for sublist in list_preds for item in sublist] + golds = [self.LMAP[item] for sublist in golds for item in sublist] + f1 = f1_score(golds, list_preds, average="weighted") + return f1 + + def aggregation(self): + return { + "entity_f1": self.entity_f1, + "f1": self.label_f1, + } + + +class AbstractiveSummarization(ConfigurableTask): + VERSION = 1 + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + return doc["answer"] + + def process_results(self, doc, results): + return { + "rouge1": (doc["answer"], results[0]), + "rouge2": (doc["answer"], results[0]), + "rougeL": (doc["answer"], results[0]), + "bert_score_f1": (doc["answer"], results[0]), + "bart_score": (doc["answer"], results[0]), + } + + def higher_is_better(self): + return { + "rouge1": True, + "rouge2": True, + "rougeL": True, + "bert_score_f1": True, + "bart_score": True, + } + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def rouge_score(self, items): + golds, preds = zip(*items) + rouge = evaluate.load("rouge") + results = rouge.compute(predictions=preds, references=golds) + return results + + def rouge1(self, items): + results = self.rouge_score(items) + return results["rouge1"] + + def rouge2(self, items): + results = self.rouge_score(items) + return results["rouge2"] + + def rougeL(self, items): + results = self.rouge_score(items) + return results["rougeL"] + + def bert_score(self, items): + if getattr(self, "_cache_bertscore", None) is None: + golds, preds = zip(*items) + bertscore = evaluate.load("evaluate-metric/bertscore") + self._cache_bertscore = bertscore.compute( + predictions=preds, + references=golds, + model_type="bert-base-multilingual-cased", + ) + return self._cache_bertscore + else: + return self._cache_bertscore + + def bert_score_f1(self, items): + res = self.bert_score(items) + return sum(res["f1"]) / len(res["f1"]) + + def bart_score(self, items): + golds, preds = zip(*items) + bart_scorer = BARTScorer(device="cuda", checkpoint="facebook/bart-large-cnn") + bart_path = os.path.abspath(os.path.join(__file__, "..", "..", "..")) + bart_path = os.path.join(bart_path, "external_utils", "BARTScore", "bart_score.pth") + bart_scorer.load(path=bart_path) + res = bart_scorer.score(srcs=preds, tgts=golds, batch_size=8) + return sum(res) / len(res) + + def aggregation(self): + return { + "rouge1": self.rouge1, + "rouge2": self.rouge2, + "rougeL": self.rougeL, + "bert_score_f1": self.bert_score_f1, + "bart_score": self.bart_score, + } + + +class ExtractiveSummarization(ConfigurableTask): + VERSION = 1 + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + return doc["answer"] + + def process_results(self, doc, results): + return { + "rouge1": (doc["label"], doc["text"], results[0]), + "rouge2": (doc["label"], doc["text"], results[0]), + "rougeL": (doc["label"], doc["text"], results[0]), + "bert_score_f1": (doc["label"], doc["text"], results[0]), + "bart_score": (doc["label"], doc["text"], results[0]), + } + + def higher_is_better(self): + return { + "rouge1": True, + "rouge2": True, + "rougeL": True, + "bert_score_f1": True, + "bart_score": True, + } + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def get_sum(self, labels, texts): + summ = [] + for label, text in zip(labels, texts): + text = text.split("\n") + new_text = "\n".join( + [ + text[index] + for index in range(len(text)) + if index < len(label) and label[index] == 1 + ] + ) + summ.append(new_text) + return summ + + def rouge_score(self, items): + golds, texts, preds = zip(*items) + golds = self.get_sum(golds, texts) + preds = self.get_sum([val.split("\n") for val in preds], texts) + rouge = evaluate.load("rouge") + results = rouge.compute(predictions=preds, references=golds) + return results + + def rouge1(self, items): + results = self.rouge_score(items) + return results["rouge1"] + + def rouge2(self, items): + results = self.rouge_score(items) + return results["rouge2"] + + def rougeL(self, items): + results = self.rouge_score(items) + return results["rougeL"] + + def bert_score(self, items): + if getattr(self, "_cache_bertscore", None) is None: + golds, texts, preds = zip(*items) + golds = self.get_sum(golds, texts) + preds = self.get_sum([val.split("\n") for val in preds], texts) + + bertscore = evaluate.load("evaluate-metric/bertscore") + self._cache_bertscore = bertscore.compute( + predictions=preds, + references=golds, + model_type="bert-base-multilingual-cased", + ) + return self._cache_bertscore + else: + return self._cache_bertscore + + def bert_score_f1(self, items): + res = self.bert_score(items) + return sum(res["f1"]) / len(res["f1"]) + + def bart_score(self, items): + golds, texts, preds = zip(*items) + golds = self.get_sum(golds, texts) + preds = self.get_sum([val.split("\n") for val in preds], texts) + + bart_scorer = BARTScorer(device="cuda", checkpoint="facebook/bart-large-cnn") + bart_path = os.path.abspath(os.path.join(__file__, "..", "..", "..")) + bart_path = os.path.join(bart_path, "external_utils", "BARTScore", "bart_score.pth") + bart_scorer.load(path=bart_path) + res = bart_scorer.score(srcs=preds, tgts=golds, batch_size=8) + return sum(res) / len(res) + + def aggregation(self): + return { + "rouge1": self.rouge1, + "rouge2": self.rouge2, + "rougeL": self.rougeL, + "bert_score_f1": self.bert_score_f1, + "bart_score": self.bart_score, + } + + +class RelationExtraction(ConfigurableTask): + VERSION = 1 + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + return doc["answer"] + + def process_results(self, doc, results): + return { + "precision": (doc["label"], results[0]), + "recall": (doc["label"], results[0]), + "f1": (doc["label"], results[0]), + } + + def higher_is_better(self): + return { + "precision": True, + "recall": True, + "f1": True, + } + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def process(self, items): + golds, preds = zip(*items) + + all_golds = [] + all_preds = [] + + for gold, pred in zip(golds, preds): + all_golds.extend(gold) + pred = pred.split("\n") + all_preds.extend(pred) + + return set(all_golds), set(all_preds) + + def precision(self, items): + golds, preds = self.process(items) + tp = golds & preds + prec = len(tp) / len(preds) + return prec + + def recall(self, items): + golds, preds = self.process(items) + tp = golds & preds + rec = len(tp) / len(golds) + return rec + + def cal_f1(self, items): + prec = self.precision(items) + rec = self.recall(items) + if prec + rec == 0.0: + return 0.0 + return 2 * (prec * rec) / (prec + rec) + + def aggregation(self): + return { + "precision": self.precision, + "recall": self.recall, + "f1": self.cal_f1, + } + + +class QA(ConfigurableTask): + VERSION = 1 + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return True + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def should_decontaminate(self): + return True + + def doc_to_decontamination_query(self, doc): + return doc["text"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def doc_to_target(self, doc): + return doc["answer"] + + def process_results(self, doc, results): + gold = doc["answer"] + + acc = 1.0 if results[0].strip() == gold else 0.0 + + return { + "acc": acc, + } + + def higher_is_better(self): + return { + "acc": True, + } + + def aggregation(self): + return { + "acc": mean, + } + + +class FPB(Classification): + DATASET_PATH = "chancefocus/flare-fpb" + + +class FIQASA(Classification): + DATASET_PATH = "chancefocus/flare-fiqasa" + + +class NER(ConfigurableTask): + VERSION = 1 + DATASET_PATH = "chancefocus/flare-ner" + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return True + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def should_decontaminate(self): + return True + + def doc_to_decontamination_query(self, doc): + return doc["text"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def construct_requests(self, doc, ctx, **kwargs): + """Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {}), + idx=0, + **kwargs, + ) + ] + + def doc_to_target(self, doc): + return doc["answer"] + + def process_results(self, doc, results): + text = doc["text"] + pred = process_text(results[0], text) + + return {"entity_f1": (pred, doc["label"], results[0])} + + def higher_is_better(self): + return { + "entity_f1": True, + } + + @classmethod + def entity_f1(cls, items): + preds, golds, _ = zip(*items) + f1 = entity_score(golds, preds) + return f1 + + def aggregation(self): + return { + "entity_f1": self.entity_f1, + } + + +class FinQA(QA): + DATASET_PATH = "chancefocus/flare-finqa" + + +class StockMovement(Classification): + DATASET_NAME = None + CALCULATE_MCC = True + CHOICE_DICT = { + "rise": ["yes", "positive"], + "fall": ["no", "negative", "neutral"], + } + DEFAULT = "fall" + + def process_results(self, doc, results): + gold: str = doc["choices"][doc["gold"]] + if self.LOWER_CASE: + gold = gold.lower() + ini_result = results[0].strip() + if self.LOWER_CASE: + ini_result = ini_result.lower() + + result = None + for choice in doc["choices"]: + if self.LOWER_CASE: + choice = choice.lower() + if choice in ini_result or any( + [val in ini_result for val in self.CHOICE_DICT[choice]] + ): + result = choice + break + if result is None: + result = self.DEFAULT + + acc = 1.0 if gold == result else 0.0 + + results = { + "acc": acc, + "missing": int(result == "missing"), + "f1": (result, gold), + "macro_f1": (result, gold), + } + + if self.CALCULATE_MCC: + results["mcc"] = (result, gold) + + return results + + +class StockMovementBigData(StockMovement): + DATASET_PATH = "chancefocus/flare-sm-bigdata" + + +class StockMovementACL(StockMovement): + DATASET_PATH = "chancefocus/flare-sm-acl" + + +class StockMovementCIKM(StockMovement): + DATASET_PATH = "chancefocus/flare-sm-cikm" + + +SM_TASKS = { + "flare_sm_bigdata": StockMovementBigData, + "flare_sm_acl": StockMovementACL, + "flare_sm_cikm": StockMovementCIKM, +} + + +class Headlines(Classification): + DATASET_PATH = "chancefocus/flare-headlines" + + def process_results(self, doc, results): + gold = doc["gold"] + + return { + "avg_f1": (doc["label_type"], int(results[0].strip() != "Yes"), gold, results), + } + + def higher_is_better(self): + return { + "avg_f1": True, + } + + @classmethod + def label_avg(cls, items): + labels, preds, golds, rels = zip(*items) + label_set = set(labels) + labels = np.array(labels) + preds = np.array(preds) + golds = np.array(golds) + all_f1s = [] + for l in label_set: + pds = preds[labels == l] + gds = golds[labels == l] + f1 = f1_score(gds, pds, average="weighted", labels=[0, 1]) + all_f1s.append(f1) + return np.mean(all_f1s) + + # def construct_requests(self, doc, ctx): + # """Uses RequestFactory to construct Requests and returns an iterable of + # Requests which will be sent to the LM. + + # :param doc: + # The document as returned from training_docs, validation_docs, or test_docs. + # :param ctx: str + # The context string, generated by fewshot_context. This includes the natural + # language description, as well as the few shot examples, and the question + # part of the document for `doc`. + # """ + # cont_request = rf.greedy_until(ctx, {"until": None}) + # return cont_request + + def aggregation(self): + return { + "avg_f1": self.label_avg, + } + + +class FinerOrd(SequentialLabeling): + DATASET_PATH = "chancefocus/flare-finer-ord" + LMAP = { + "O": 0, + "B-PER": 1, + "I-PER": 2, + "B-LOC": 3, + "I-LOC": 4, + "B-ORG": 5, + "I-ORG": 6, + } + + +class FOMC(Classification): + # DATASET_PATH = "chancefocus/flare-fomc" + DATASET_PATH = "TheFinAI/finben-fomc" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class German(StockMovement): + DATASET_PATH = "chancefocus/flare-german" + CHOICE_DICT = { + "good": ["yes", "positive"], + "bad": ["no", "negative", "neutral"], + } + DEFAULT = "good" + + +class Australian(StockMovement): + # DATASET_PATH = "chancefocus/flare-australian" + DATASET_PATH = "TheFinAI/flare-australian" + CHOICE_DICT = { + "good": ["yes", "positive"], + "bad": ["no", "negative", "neutral"], + } + DEFAULT = "good" + + +class ECTSUM(ExtractiveSummarization): + DATASET_PATH = "chancefocus/flare-ectsum" + + +class EDTSUM(AbstractiveSummarization): + DATASET_PATH = "chancefocus/flare-edtsum" + + +class EDTSUM_test(AbstractiveSummarization): + DATASET_PATH = "TheFinAI/flare-edtsum_test" + + +class ConvFinQA(QA): + DATASET_PATH = "chancefocus/flare-convfinqa" + + def reformulate_turn_req(self, req, turn_request, turn): + if turn == 0: + return req + pre_answers = {f"answer{i}": turn_request[i][0] for i in range(turn)} + if pre_answers: + req.args = tuple([req.args[0].format(**pre_answers)] + list(req.args[1:])) + return req + + +class TSA(ConfigurableTask): + VERSION = 1 + DATASET_PATH = "chancefocus/flare-tsa" + DATASET_NAME = None + EVAL_LAST_TURN = True + + def __init__(self, **kwargs): + super().__init__(config={"metadata": {"version": self.VERSION}}) + + def reformulate_turn_req(self, req, turn_request, turn): + return req + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + def has_test_docs(self): + return True + + def training_docs(self): + return self.dataset["train"] + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def doc_to_text(self, doc): + # TODO: Format the query prompt portion of the document example. + return doc["query"] + + def doc_to_target(self, doc): + return "\nAnswer: " + str(doc["answer"]) + + def process_results(self, doc, results): + pred = results[0].split("\n")[0] + pred = re.findall(r'[0-9]+(?:\.[0-9]+)?', pred) + missing = 0 + if not pred: + pred = -100.0 + missing = 1 + else: + pred = pred[0] + pred = float(pred) + return { + "rmse": (doc["answer"], pred), + "missing": missing + } + + def higher_is_better(self): + return { + "rmse": False, + } + + def construct_requests(self, doc, ctx,**kwargs): + """ + Uses RequestFactory to construct Requests and returns an iterable of + Requests which will be sent to the LM. + + :param doc: + The document as returned from training_docs, validation_docs, or test_docs. + :param ctx: str + The context string, generated by fewshot_context. This includes the natural + language description, as well as the few shot examples, and the question + part of the document for `doc`. + """ + # cont_request = rf.greedy_until(ctx, {"until": "Answer:"}) + # return cont_request + kwargs.pop("apply_chat_template") + return [ + Instance( + request_type="generate_until", + doc=doc, + arguments=(ctx, {"until": "Answer:"}), + idx=0, + **kwargs, + ) + ] + + def rmse(self, items): + golds, preds = zip(*items) + fgolds, fpreds = [], [] + for gold, pred in zip(golds, preds): + if pred == -100.0: + continue + fgolds.append(gold) + fpreds.append(max(min(pred, 1.0), -1.0)) + rmse = mean_squared_error(fgolds, fpreds, squared=True) + + return rmse + + def aggregation(self): + return { + "rmse": self.rmse, + "missing": mean, + } + + + +class CFA(Classification): + DATASET_PATH = "chancefocus/flare-cfa" + LOWER_CASE = False + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class FINARGECCARC(Classification): + DATASET_PATH = "chancefocus/flare-finarg-ecc-arc" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class FINARGECCAUC(Classification): + DATASET_PATH = "chancefocus/flare-finarg-ecc-auc" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class FINARGECCAUC_test(Classification): + DATASET_PATH = "TheFinAI/flare-finarg-ecc-auc_test" + + +class MLESG(Classification): + DATASET_PATH = "chancefocus/flare-mlesg" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class FSRL(SequentialLabeling): + DATASET_PATH = "chancefocus/flare-fsrl" + LMAP = {key: index for index, key in enumerate(['O', 'I-QUANT', 'B-QUANT', 'I-TIME', 'B-TIME', 'I-MANNER', 'B-MANNER', 'I-THEME', 'B-THEME', 'I-VALUE', 'B-VALUE', 'I-WHOLE', 'B-WHOLE', 'I-LOCATION', 'B-LOCATION', 'I-AGENT', 'B-AGENT', 'I-CAUSE', 'B-CAUSE', 'I-SOURCE', 'B-SOURCE', 'I-REF_TIME', 'B-REF_TIME', 'I-CONDITION', 'B-CONDITION'])} + +class CFA(Classification): + DATASET_PATH = "chancefocus/flare-cfa" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + +# class FinargECCAUC(Classification): +# DATASET_PATH = "chancefocus/flare-finarg-ecc-auc" + +# class FinargECCARC(Classification): +# DATASET_PATH = "chancefocus/flare-finarg-ecc-arc" + +class CD(SequentialLabeling): + DATASET_PATH = "chancefocus/flare-cd" + LMAP = {key: index for index, key in enumerate(['O', 'I-CAUSE', 'B-CAUSE', 'I-EFFECT', 'B-EFFECT'])} + +class MultiFinEN(Classification): + DATASET_PATH = "chancefocus/flare-multifin-en" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + +class MA(Classification): + DATASET_PATH = "chancefocus/flare-ma" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + +class Causal20SC(Classification): + DATASET_PATH = "chancefocus/flare-causal20-sc" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + +class FNXL(SequentialLabeling): + DATASET_PATH = "chancefocus/flare-fnxl" + LMAP = {'B-BusinessCombinationContingentConsiderationArrangementsRangeOfOutcomesValueHigh': 140, 'B-VariableInterestEntityOwnershipPercentage': 646, 'B-GainLossOnDispositionOfAssets1': 119, 'B-IndefiniteLivedIntangibleAssetsExcludingGoodwill': 46, 'B-MarketingAndAdvertisingExpense': 269, 'B-ReportingUnitPercentageOfFairValueInExcessOfCarryingAmount': 142, 'B-CapitalizedComputerSoftwareNet': 91, 'B-BusinessCombinationConsiderationTransferredEquityInterestsIssuedAndIssuable': 183, 'B-LitigationSettlementExpense': 115, 'B-DefinedBenefitPlanExpectedAmortizationOfGainLossNextFiscalYear': 639, 'B-DeferredCompensationArrangementWithIndividualCompensationExpense': 15, 'B-ReclassificationFromAociCurrentPeriodTax': 152, 'B-OtherComprehensiveIncomeLossBeforeReclassificationsTax': 694, 'B-PreferredStockDividendsPerShareDeclared': 236, 'B-CapitalExpendituresIncurredButNotYetPaid': 344, 'B-DeferredCompensationArrangementWithIndividualContributionsByEmployer': 560, 'B-SeveranceCosts1': 311, 'B-InterestExpense': 784, 'B-SaleOfStockConsiderationReceivedOnTransaction': 76, 'B-LineOfCreditFacilityInterestRateAtPeriodEnd': 822, 'B-SharesIssuedPricePerShare': 137, 'B-EquityMethodInvestmentDifferenceBetweenCarryingAmountAndUnderlyingEquity': 63, 'B-EquitySecuritiesFvNi': 30, 'B-RightOfUseAssetObtainedInExchangeForOperatingLeaseLiability': 118, 'B-DefinedBenefitPlanFundedStatusOfPlan': 547, 'B-SharebasedCompensationArrangementBySharebasedPaymentAwardPurchasePriceOfCommonStockPercent': 323, 'B-TaxCutsAndJobsActOf2017IncomeTaxExpenseBenefit': 256, 'B-LongtermDebtWeightedAverageInterestRate': 364, 'B-ImpairmentOfIntangibleAssetsFinitelived': 71, 'B-ProceedsFromLinesOfCredit': 496, 'B-LongTermPurchaseCommitmentAmount': 701, 'B-DebtInstrumentFairValue': 335, 'B-RestructuringAndRelatedCostCostIncurredToDate1': 52, 'B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsVestedInPeriod': 581, 'B-FiniteLivedIntangibleAssetsAccumulatedAmortization': 143, 'B-StockRepurchasedAndRetiredDuringPeriodValue': 330, 'B-BusinessCombinationProFormaInformationRevenueOfAcquireeSinceAcquisitionDateActual': 77, 'B-ClassOfWarrantOrRightExercisePriceOfWarrantsOrRights1': 361, 'B-BusinessAcquisitionPurchasePriceAllocationGoodwillExpectedTaxDeductibleAmount': 550, 'B-OperatingLossCarryforwardsValuationAllowance': 173, 'B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued': 32, 'B-DefinedContributionPlanMaximumAnnualContributionsPerEmployeePercent': 45, 'B-ContractWithCustomerLiabilityCurrent': 2, 'B-IncomeLossFromContinuingOperationsBeforeIncomeTaxesForeign': 474, 'B-FiniteLivedIntangibleAssetsAmortizationExpenseYearThree': 1306, 'B-DefinedBenefitPlanUltimateHealthCareCostTrendRate1': 62, 'B-DefinedBenefitPlanRecognizedNetGainLossDueToSettlements1': 317, 'B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesExpense': 448, 'B-ForeignCurrencyTransactionGainLossRealized': 132, 'B-DeferredTaxAssetsOperatingLossCarryforwardsSubjectToExpiration': 262, 'B-RetainedEarningsAccumulatedDeficit': 174, 'B-ProceedsFromIssuanceOfCommonStock': 209, 'B-EmployeeServiceShareBasedCompensationAllocationOfRecognizedPeriodCostsCapitalizedAmount': 29, 'B-OtherComprehensiveIncomeLossPensionAndOtherPostretirementBenefitPlansTax': 284, 'B-InventoryWriteDown': 465, 'B-RestructuringReserve': 234, 'B-LitigationSettlementAmountAwardedToOtherParty': 42, 'B-DerivativeGainLossOnDerivativeNet': 87, 'B-SharebasedCompensationArrangementBySharebasedPaymentAwardEquityInstrumentsOtherThanOptionsAggregateIntrinsicValueVested': 241, 'B-DerivativeFixedInterestRate': 589, 'B-CashAndCashEquivalentsAtCarryingValue': 257, 'B-ContractWithCustomerAssetNet': 245, 'B-RestructuringAndRelatedCostExpectedCost1': 107, 'B-IncomeTaxHolidayAggregateDollarAmount': 347, 'B-OperatingLeaseCost': 248, 'B-AllowanceForDoubtfulAccountsReceivable': 146, 'B-RepaymentsOfDebt': 416, 'B-InterestPaid': 110, 'B-DeferredFinanceCostsNet': 28, 'B-IncomeTaxExaminationPenaltiesAndInterestAccrued': 271, 'B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber': 92, 'B-CapitalizedContractCostNet': 155, 'B-CumulativeEffectOfNewAccountingPrincipleInPeriodOfAdoption': 17, 'B-IncomeTaxesPaid': 495, 'B-EquityMethodInvestmentOtherThanTemporaryImpairment': 22, 'B-InterestPaidNet': 225, 'B-EquitySecuritiesWithoutReadilyDeterminableFairValueAmount': 175, 'B-ImpairmentOfLongLivedAssetsHeldForUse': 313, 'B-GoodwillAcquiredDuringPeriod': 156, 'B-DecreaseInUnrecognizedTaxBenefitsIsReasonablyPossible': 363, 'B-RestructuringAndRelatedCostIncurredCost': 75, 'B-StockRepurchasedDuringPeriodValue': 254, 'B-IncomeTaxExaminationPenaltiesAndInterestExpense': 525, 'B-ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill': 55, 'B-PreferredStockLiquidationPreference': 157, 'B-ImpairmentOfIntangibleAssetsExcludingGoodwill': 158, 'B-IncomeTaxesPaidNet': 456, 'B-DefinedContributionPlanEmployerMatchingContributionPercent': 332, 'B-CostOfGoodsAndServicesSold': 274, 'B-DepreciationDepletionAndAmortization': 338, 'B-InterestExpenseDebt': 191, 'B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage': 442, 'B-DisposalGroupIncludingDiscontinuedOperationConsideration': 6, 'B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesAccrued': 14, 'B-SaleOfStockPricePerShare': 278, 'B-DefinedContributionPlanEmployerMatchingContributionPercentOfMatch': 267, 'B-FinitelivedIntangibleAssetsAcquired1': 202, 'B-PaymentsForRepurchaseOfCommonStock': 486, 'B-BusinessCombinationContingentConsiderationLiability': 103, 'B-RelatedPartyTransactionAmountsOfTransaction': 180, 'O': 0} + +class TATQA(QA): + DATASET_PATH = "chancefocus/flare-tatqa" + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return False + + +class FinRED(RelationExtraction): + DATASET_PATH = "chancefocus/flare-finred" + + +class lendingclub(Classification): + # DATASET_PATH = "chancefocus/cra-lendingclub" + DATASET_PATH = "TheFinAI/cra-lendingclub" + CALCULATE_MCC = True + + +class ccf(Classification): + DATASET_PATH = "chancefocus/cra-ccf" + CALCULATE_MCC = True + + +class ccfraud(Classification): + DATASET_PATH = "chancefocus/cra-ccfraud" + CALCULATE_MCC = True + + +class polish(Classification): + DATASET_PATH = "chancefocus/cra-polish" + CALCULATE_MCC = True + + +class taiwan(Classification): + DATASET_PATH = "chancefocus/cra-taiwan" + CALCULATE_MCC = True + + +class portoseguro(Classification): + DATASET_PATH = "chancefocus/cra-portoseguro" + CALCULATE_MCC = True + + +class travelinsurace(Classification): + DATASET_PATH = "chancefocus/cra-travelinsurace" + CALCULATE_MCC = True + + +############### + +# %% +import torch +import torch.nn as nn +import traceback +from transformers import BartTokenizer, BartForConditionalGeneration +from typing import List +import numpy as np + + +class BARTScorer: + def __init__(self, device='cuda:0', max_length=1024, checkpoint='facebook/bart-large-cnn'): + # Set up model + self.device = device + self.max_length = max_length + self.tokenizer = BartTokenizer.from_pretrained(checkpoint) + self.model = BartForConditionalGeneration.from_pretrained(checkpoint) + self.model.eval() + self.model.to(device) + + # Set up loss + self.loss_fct = nn.NLLLoss(reduction='none', ignore_index=self.model.config.pad_token_id) + self.lsm = nn.LogSoftmax(dim=1) + + def load(self, path=None): + """ Load model from paraphrase finetuning """ + if path is None: + path = 'models/bart.pth' + self.model.load_state_dict(torch.load(path, map_location=self.device)) + + def score(self, srcs, tgts, batch_size=4): + """ Score a batch of examples """ + score_list = [] + for i in range(0, len(srcs), batch_size): + src_list = srcs[i: i + batch_size] + tgt_list = tgts[i: i + batch_size] + try: + with torch.no_grad(): + encoded_src = self.tokenizer( + src_list, + max_length=self.max_length, + truncation=True, + padding=True, + return_tensors='pt' + ) + encoded_tgt = self.tokenizer( + tgt_list, + max_length=self.max_length, + truncation=True, + padding=True, + return_tensors='pt' + ) + src_tokens = encoded_src['input_ids'].to(self.device) + src_mask = encoded_src['attention_mask'].to(self.device) + + tgt_tokens = encoded_tgt['input_ids'].to(self.device) + tgt_mask = encoded_tgt['attention_mask'] + tgt_len = tgt_mask.sum(dim=1).to(self.device) + + output = self.model( + input_ids=src_tokens, + attention_mask=src_mask, + labels=tgt_tokens + ) + logits = output.logits.view(-1, self.model.config.vocab_size) + loss = self.loss_fct(self.lsm(logits), tgt_tokens.view(-1)) + loss = loss.view(tgt_tokens.shape[0], -1) + loss = loss.sum(dim=1) / tgt_len + curr_score_list = [-x.item() for x in loss] + score_list += curr_score_list + + except RuntimeError: + traceback.print_exc() + print(f'source: {src_list}') + print(f'target: {tgt_list}') + exit(0) + return score_list + + def multi_ref_score(self, srcs, tgts: List[List[str]], agg="mean", batch_size=4): + # Assert we have the same number of references + ref_nums = [len(x) for x in tgts] + if len(set(ref_nums)) > 1: + raise Exception("You have different number of references per test sample.") + + ref_num = len(tgts[0]) + score_matrix = [] + for i in range(ref_num): + curr_tgts = [x[i] for x in tgts] + scores = self.score(srcs, curr_tgts, batch_size) + score_matrix.append(scores) + if agg == "mean": + score_list = np.mean(score_matrix, axis=0) + elif agg == "max": + score_list = np.max(score_matrix, axis=0) + else: + raise NotImplementedError + return list(score_list) + + def test(self, batch_size=3): + """ Test """ + src_list = [ + 'This is a very good idea. Although simple, but very insightful.', + 'Can I take a look?', + 'Do not trust him, he is a liar.' + ] + + tgt_list = [ + "That's stupid.", + "What's the problem?", + 'He is trustworthy.' + ] + + print(self.score(src_list, tgt_list, batch_size)) + diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/fnxl.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/fnxl.yaml new file mode 100644 index 00000000..481dedba --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/fnxl.yaml @@ -0,0 +1,2 @@ +task: fnxl +class: !function flare.FNXL diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/fomc.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/fomc.yaml new file mode 100644 index 00000000..49283578 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/fomc.yaml @@ -0,0 +1,2 @@ +task: fomc +class: !function flare.FOMC \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/fpb.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/fpb.yaml new file mode 100644 index 00000000..d0f31f7d --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/fpb.yaml @@ -0,0 +1,2 @@ +task: fpb +class: !function flare.FPB diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/fsrl.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/fsrl.yaml new file mode 100644 index 00000000..b63cc9a4 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/fsrl.yaml @@ -0,0 +1,2 @@ +task: fsrl +class: !function flare.FSRL diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/german.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/german.yaml new file mode 100644 index 00000000..ac900011 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/german.yaml @@ -0,0 +1,2 @@ +task: german +class: !function flare.German diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/headlines.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/headlines.yaml new file mode 100644 index 00000000..6178d264 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/headlines.yaml @@ -0,0 +1,2 @@ +task: headlines +class: !function flare.Headlines \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/ma.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/ma.yaml new file mode 100644 index 00000000..481b3efe --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/ma.yaml @@ -0,0 +1,2 @@ +task: ma +class: !function flare.MA \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/mlesg.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/mlesg.yaml new file mode 100644 index 00000000..f32b23ec --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/mlesg.yaml @@ -0,0 +1,2 @@ +task: mlesg +class: !function flare.MLESG \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/multifin_en.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/multifin_en.yaml new file mode 100644 index 00000000..091e8e4b --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/multifin_en.yaml @@ -0,0 +1,2 @@ +task: multifin_en +class: !function flare.MultiFinEN \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/ner.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/ner.yaml new file mode 100644 index 00000000..88074566 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/ner.yaml @@ -0,0 +1,2 @@ +task: ner +class: !function flare.NER diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/sm_acl.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/sm_acl.yaml new file mode 100644 index 00000000..5049f2a5 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/sm_acl.yaml @@ -0,0 +1,2 @@ +task: sm_acl +class: !function flare.StockMovementACL \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/sm_bigdata.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/sm_bigdata.yaml new file mode 100644 index 00000000..abdaa2da --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/sm_bigdata.yaml @@ -0,0 +1,2 @@ +task: sm_bigdata +class: !function flare.StockMovementBigData \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/sm_cikm.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/sm_cikm.yaml new file mode 100644 index 00000000..6d52f730 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/sm_cikm.yaml @@ -0,0 +1,2 @@ +task: sm_cikm +class: !function flare.StockMovementCIKM \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/tatqa.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/tatqa.yaml new file mode 100644 index 00000000..8cf461e3 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/tatqa.yaml @@ -0,0 +1,2 @@ +task: tatqa +class: !function flare.TATQA \ No newline at end of file diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/tsa.yaml b/examples/dataset_llm_workflow/extra_tasks/flare/tsa.yaml new file mode 100644 index 00000000..efd902f0 --- /dev/null +++ b/examples/dataset_llm_workflow/extra_tasks/flare/tsa.yaml @@ -0,0 +1,2 @@ +task: tsa +class: !function flare.TSA diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index 2b466dd9..fa6e2838 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -359,7 +359,7 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): performance_table.insert(0, "Dataset", datasets+['Avg']) performance_table.to_csv(f"model_performance/{benchmark_name}-new.csv", index=False) else: - performance_table = pd.read_csv(f"model_performance/{benchmark_name}-new.csv") + performance_table = pd.read_csv(f"model_performance/{benchmark_name}.csv") results_table = self._anlysis_table(benchmark_name, performance_table, score_results) self._plot_radar_chart(benchmark_name, results_table[:-4]) From e89bcef1f9a0a9a36e8861ab7ac62ac3812231d2 Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Tue, 25 Mar 2025 17:46:42 +0800 Subject: [PATCH 092/108] [MNT] do not return learnware that has semantic not in user query --- learnware/market/easy/searcher.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 6a0d579f..0e76ae37 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -122,9 +122,17 @@ def _match_semantic_spec_tag(self, semantic_spec1, semantic_spec2) -> bool: """ for key in semantic_spec1.keys(): v1 = semantic_spec1[key].get("Values", "") - if key not in semantic_spec2 or len(v1) == 0: + if len(v1) == 0: continue - + + if key not in semantic_spec2: + if "Others" in v1: + # v1 contains "Others" and key not in semantic_spec2 + continue + else: + # user input contains some key that is not in database + return False + v2 = semantic_spec2[key].get("Values", "") if key not in ("Name", "Description"): if len(v2) == 0: From 23f3e82aaa0bec271d2005facfc594dfa4b3e5c7 Mon Sep 17 00:00:00 2001 From: zouxiaochuan Date: Tue, 25 Mar 2025 23:43:21 +0800 Subject: [PATCH 093/108] [MNT] add Optional type in semantic search --- learnware/market/easy/searcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 0e76ae37..0b8f3e66 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -139,7 +139,7 @@ def _match_semantic_spec_tag(self, semantic_spec1, semantic_spec2) -> bool: # user input contains some key that is not in database return False - if semantic_spec1[key]["Type"] == "Class": + if semantic_spec1[key]["Type"] in ("Class", "Optional"): if isinstance(v2, list): v2 = v2[0] if v2 not in v1: From 06d4f599977544ed223c55ff8ffa837b0c01dbf6 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Wed, 26 Mar 2025 12:54:53 +0800 Subject: [PATCH 094/108] [MNT] modify learnware_ids in llm_workflow config --- .../dataset_llm_workflow/benchmark/config.py | 204 +++++++++--------- 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/examples/dataset_llm_workflow/benchmark/config.py b/examples/dataset_llm_workflow/benchmark/config.py index 4493a777..18b46f37 100644 --- a/examples/dataset_llm_workflow/benchmark/config.py +++ b/examples/dataset_llm_workflow/benchmark/config.py @@ -104,113 +104,113 @@ } LEARNWARE_MED_IDS = [ - "00002688", - "00002689", - "00002690", - "00002691", - "00002692", - "00002693", - "00002694", - "00002695", - "00002696", - "00002697", - "00002698", - "00002699", - "00002700", + "00002789", + "00002790", + "00002791", + "00002792", + "00002793", + "00002794", + "00002795", + "00002796", + "00002797", + "00002798", + "00002799", + "00002800", + "00002801", ] LEARNWARE_MATH_IDS = [ - "00002701", - "00002702", - "00002703", - "00002704", - "00002705", - "00002706", - "00002707", - "00002708", - "00002709", - "00002710", - "00002711", - "00002712", - "00002713", + "00002802", + "00002803", + "00002804", + "00002805", + "00002806", + "00002807", + "00002808", + "00002809", + "00002810", + "00002811", + "00002812", + "00002813", + "00002814", ] LEARNWARE_FIN_IDS = [ - "00002714", - "00002715", - "00002716", - "00002717", - "00002718", - "00002719", - "00002720", - "00002720", - "00002721", - "00002722", - "00002723", - "00002724", - "00002725", - "00002726", - "00002727", - "00002728", - "00002729", - "00002730", - "00002731", - "00002732", - "00002733", - "00002734", - "00002735", - "00002736", - "00002737", - "00002738", - "00002739", - "00002740", - "00002741", - "00002742", - "00002743", - "00002744", - "00002745", - "00002746", - "00002747", - "00002748", - "00002749", - "00002750", - "00002751", - "00002752", - "00002753", - "00002754", - "00002755", - "00002756", - "00002757", - "00002758", - "00002759", - "00002760", - "00002761", - "00002762", - "00002763", - "00002764", - "00002765", - "00002766", - "00002767", - "00002768", - "00002769", - "00002770", - "00002771", - "00002772", - "00002773", - "00002774", - "00002775", - "00002776", - "00002777", - "00002778", - "00002779", - "00002780", - "00002781", - "00002782", - "00002783", - "00002784", - "00002785", - "00002786", - "00002787", - "00002788", + "00002815", + "00002816", + "00002817", + "00002818", + "00002819", + "00002820", + "00002820", + "00002821", + "00002822", + "00002823", + "00002824", + "00002825", + "00002826", + "00002827", + "00002828", + "00002829", + "00002830", + "00002831", + "00002832", + "00002833", + "00002834", + "00002835", + "00002836", + "00002837", + "00002838", + "00002839", + "00002840", + "00002841", + "00002842", + "00002843", + "00002844", + "00002845", + "00002846", + "00002847", + "00002848", + "00002849", + "00002850", + "00002851", + "00002852", + "00002853", + "00002854", + "00002855", + "00002856", + "00002857", + "00002858", + "00002859", + "00002860", + "00002861", + "00002862", + "00002863", + "00002864", + "00002865", + "00002866", + "00002867", + "00002868", + "00002869", + "00002870", + "00002871", + "00002872", + "00002873", + "00002874", + "00002875", + "00002876", + "00002877", + "00002878", + "00002879", + "00002880", + "00002881", + "00002882", + "00002883", + "00002884", + "00002885", + "00002886", + "00002887", + "00002888", + "00002889", ] \ No newline at end of file From 9f4a5319000d887e2615a0664f9f44a996b8e06e Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Wed, 26 Mar 2025 15:11:26 +0800 Subject: [PATCH 095/108] [MNT] modify generative.py on loading base and adapter model from beimingwu --- learnware/specification/regular/text/generative.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index 0034fb84..6b68cbb8 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -139,7 +139,10 @@ def _init_tokenizer_model(self, beimingwu): And, this method should not be overridden if the specification needs to be submitted to Beimingwu. """ if beimingwu: - base_model_path = os.path.expanduser("~/Meta/saved-learnwares/saved-PTM") + from ....client import LearnwareClient + + client = LearnwareClient() + base_model_path = client.get_pretrained_path("00002890") else: base_model_path = "Qwen/Qwen2.5-0.5B" @@ -152,7 +155,8 @@ def _init_tokenizer_model(self, beimingwu): ).to(self._device) if beimingwu: - adapter_path = os.path.expanduser("~/Meta/saved-learnwares/saved-adapter") + client = LearnwareClient() + adapter_path = client.get_pretrained_path("00002891") model = PeftModel.from_pretrained(model, adapter_path) for n, p in model.named_parameters(): From 3d569cc0698de0a4d7cbbb7aeb5d8e54b06a45bd Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Tue, 20 May 2025 12:14:53 +0800 Subject: [PATCH 096/108] [MNT] modify workflow details --- examples/dataset_llm_workflow/README.md | 145 +++++++++++----------- examples/dataset_llm_workflow/workflow.py | 34 ++--- 2 files changed, 91 insertions(+), 88 deletions(-) diff --git a/examples/dataset_llm_workflow/README.md b/examples/dataset_llm_workflow/README.md index adb729a4..5897ece5 100644 --- a/examples/dataset_llm_workflow/README.md +++ b/examples/dataset_llm_workflow/README.md @@ -2,11 +2,14 @@ ## Introduction -This workflow refers to Section 5 of our paper "Learnware Retrieval with Parameter Vector Specification". We build three learnware dock systems of 8B-level LLMs across three domains: finance, healthcare, and mathematics. We evaluate them on public evaluation benchmarks. +This workflow refers to Section 4 of our paper [*Learnware of Language Models: Specialized Small Language Models Can Do Big*](https://arxiv.org/abs/2505.13425). We build three learnware dock systems of 8B-level LLMs across three domains: finance, healthcare, and mathematics. We evaluate them on public evaluation benchmarks. -We first train multiple models under different configurations by SFT on different datasets using LoRA. Qwen2.5-7B, Llama3.1-8B, Llama3.1-8B-Instruct are our base models. Then we generate specifications for each model and apply a retrieval algorithm to select the most suitable learnware based on user task requirements. The retrieved learnware is then evaluated on the corresponding task under the **Task-Level** evaluation setting using EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness). +We first train multiple models under different configurations by SFT on different datasets using LoRA. Qwen2.5-7B, Llama3.1-8B, Llama3.1-8B-Instruct are our base models. Then we generate specifications for each model and apply a identification algorithm to select the most suitable learnware based on user task requirements. The identified learnware is then evaluated on the corresponding task under the **Task-Level** evaluation setting using EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness). -We compare PAVE against several baselines, including the Random selection strategy, the Best-single model, base models used for fine-tuning, and well-known LLMs with over 70B parameters. Best-single refers to the model with the highest average score among the learnware candidates. +We compare Learnware against several contenders, including: +- Ways to utilize specialized SLM(s). This contains a baseline algorithm, Random learnware selection, and two oracle-style strategies with access to the full evaluation results of all candidate models, the Best-single model and Oracle. Best-single refers to the model with the highest average score among the learnware candidates, and Oracle is the optimal performance of utilizing the candidate SLM learnwares by choosing one model for one task, which selects the best performing model on each user task. +- Base models used for fine-tuning. +- Well-known large language models (LLMs) with over 70B parameters. We do not distinguish between different models fine-tuned with the same instruction dataset, so if our method select a learnware for solving a given task, the performance is actually calculated by the average of all the models with the selected instruction dataset. @@ -42,31 +45,31 @@ cp -r extra_tasks/flare ~/anaconda3/envs/{env_name}/lib/python3.11/site-packages The table below shows the performance value of different methods or language models in finance scenario. -| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Best-single | PAVE | Oracle | -|:----------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:--------------|:-------|:---------| -| australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 42.21 | 56.83 | 56.83 | -| cra_lendingclub | 80.82 | 76.33 | 57.34 | 80.82 | 47.01 | 53.07 | 81.52 | 80.82 | 92.07 | 92.07 | -| fiqasa | 38.3 | 40.43 | 56.17 | 63.4 | 64.26 | 68.51 | 46.53 | 32.06 | 76.38 | 76.38 | -| fpb | 76.08 | 32.78 | 30.72 | 70.72 | 78.35 | 78.04 | 67.95 | 77.73 | 84.25 | 84.25 | -| german | 65.0 | 49.5 | 66.0 | 66.0 | 66.5 | 43.5 | 51.5 | 65.33 | 67.06 | 67.06 | -| headlines | 74.81 | 59.95 | 59.95 | 62.96 | 77.84 | 77.53 | 72.43 | 95.61 | 95.61 | 95.61 | -| ner | 21.75 | 0.62 | 9.01 | 17.89 | 9.36 | 9.52 | 24.99 | 23.98 | 52.79 | 52.79 | -| sm_acl | 51.1 | 51.4 | 51.34 | 49.3 | 51.56 | 49.38 | 51.42 | 50.71 | 52.82 | 53.63 | -| sm_bigdata | 55.3 | 55.57 | 52.79 | 51.02 | 50.27 | 47.76 | 53.86 | 55.52 | 52.4 | 55.88 | -| sm_cikm | 58.44 | 54.24 | 54.07 | 44.01 | 58.27 | 47.86 | 55.89 | 57.98 | 55.99 | 58.52 | -| causal20_sc | 65.14 | 88.48 | 79.45 | 83.75 | 76.17 | 87.16 | 74.71 | 88.61 | 84.17 | 88.61 | -| finarg_ecc_arc | 64.78 | 46.67 | 60.0 | 62.32 | 63.04 | 44.64 | 62.27 | 57.87 | 64.31 | 68.36 | -| finarg_ecc_auc | 48.3 | 51.81 | 49.85 | 55.01 | 61.71 | 65.02 | 52.08 | 48.68 | 58.08 | 58.08 | -| fomc | 60.48 | 29.44 | 34.68 | 58.47 | 57.66 | 66.13 | 56.05 | 61.36 | 62.7 | 62.7 | -| ma | 79.2 | 56.4 | 51.0 | 81.4 | 84.6 | 83.2 | 73.64 | 79.27 | 79.81 | 79.81 | -| mlesg | 35.67 | 32.67 | 20.0 | 34.67 | 38.67 | 42.33 | 31.99 | 38.33 | 33.42 | 38.33 | -| multifin_en | 60.99 | 31.32 | 28.39 | 65.38 | 63.55 | 68.5 | 54.96 | 58.61 | 63.46 | 63.46 | -| Avg. | 57.61 | 47.19 | 47.29 | 58.25 | 58.35 | 57.63 | 56.25 | 59.69 | 66.6 | 67.79 | -| Avg. rank | 5.94 | 7.35 | 7.82 | 5.94 | 4.71 | 5.24 | 6.47 | 5.47 | 2.88 | 1.65 | -| PAVE (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | 12/1/4 | nan | 0/11/6 | -| Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 14/3/0 | 6/11/0 | nan | - -Our method, PAVE, demonstrates strong performance across financial tasks, achieving the highest average score among all methods, delivering an nearly 14\% improvement compared with the best large-scale model Qwen2.5-72B. It ranks first among learnware retrieval methods in 13 out of 17 tasks, retrieves the optimal learnware (tied with Oracle) on 11 and outperforms all contenders in 8. +| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Learnware | Best-single | Oracle | +|:-------------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:------------|:--------------|:---------| +| australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 56.83 | 42.21 | 56.83 | +| cra_lendingclub | 80.82 | 76.33 | 57.34 | 80.82 | 47.01 | 53.07 | 81.52 | 92.07 | 80.82 | 92.07 | +| fiqasa | 38.3 | 40.43 | 56.17 | 63.4 | 64.26 | 68.51 | 46.53 | 76.38 | 32.06 | 76.38 | +| fpb | 76.08 | 32.78 | 30.72 | 70.72 | 78.35 | 78.04 | 67.95 | 84.25 | 77.73 | 84.25 | +| german | 65.0 | 49.5 | 66.0 | 66.0 | 66.5 | 43.5 | 51.5 | 67.06 | 65.33 | 67.06 | +| headlines | 74.81 | 59.95 | 59.95 | 62.96 | 77.84 | 77.53 | 72.43 | 95.61 | 95.61 | 95.61 | +| ner | 21.75 | 0.62 | 9.01 | 17.89 | 9.36 | 9.52 | 24.99 | 52.79 | 23.98 | 52.79 | +| sm_acl | 51.1 | 51.4 | 51.34 | 49.3 | 51.56 | 49.38 | 51.42 | 52.82 | 50.71 | 53.63 | +| sm_bigdata | 55.3 | 55.57 | 52.79 | 51.02 | 50.27 | 47.76 | 53.86 | 52.4 | 55.52 | 55.88 | +| sm_cikm | 58.44 | 54.24 | 54.07 | 44.01 | 58.27 | 47.86 | 55.89 | 55.99 | 57.98 | 58.52 | +| causal20_sc | 65.14 | 88.48 | 79.45 | 83.75 | 76.17 | 87.16 | 74.71 | 84.17 | 88.61 | 88.61 | +| finarg_ecc_arc | 64.78 | 46.67 | 60.0 | 62.32 | 63.04 | 44.64 | 62.27 | 64.31 | 57.87 | 68.36 | +| finarg_ecc_auc | 48.3 | 51.81 | 49.85 | 55.01 | 61.71 | 65.02 | 52.08 | 58.08 | 48.68 | 58.08 | +| fomc | 60.48 | 29.44 | 34.68 | 58.47 | 57.66 | 66.13 | 56.05 | 62.7 | 61.36 | 62.7 | +| ma | 79.2 | 56.4 | 51.0 | 81.4 | 84.6 | 83.2 | 73.64 | 79.81 | 79.27 | 79.81 | +| mlesg | 35.67 | 32.67 | 20.0 | 34.67 | 38.67 | 42.33 | 31.99 | 33.42 | 38.33 | 38.33 | +| multifin_en | 60.99 | 31.32 | 28.39 | 65.38 | 63.55 | 68.5 | 54.96 | 63.46 | 58.61 | 63.46 | +| Avg. | 57.61 | 47.19 | 47.29 | 58.25 | 58.35 | 57.63 | 56.25 | 66.6 | 59.69 | 67.79 | +| Avg. rank | 5.94 | 7.35 | 7.82 | 5.94 | 4.71 | 5.24 | 6.47 | 2.88 | 5.47 | 1.65 | +| Learnware (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | nan | 12/1/4 | 0/11/6 | +| Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 6/11/0 | 14/3/0 | nan | + +Our system demonstrates strong performance across financial tasks, achieving the highest average score among all methods, delivering an nearly 14\% improvement compared with the best large-scale model Qwen2.5-72B. It ranks first strategies utilizing specialized SLMs except Oracle in 13 out of 17 tasks, identifies the optimal learnware (tied with Oracle) on 11 and outperforms all contenders in 8. These results shows that our system can match or surpass large-scale models with over 70B parameters under the Task-Level evaluation setting, while requiring only the memory for models under 8B efficiently. @@ -74,51 +77,51 @@ These results shows that our system can match or surpass large-scale models with The table below shows the performance value of different methods or language models in medical scenario. -| User | Qwen2.5-7B | Flan-PaLM-540B | Random | Best-single | PAVE | Oracle | -|:----------------------|:-------------|:-----------------|:---------|:--------------|:-------|:---------| -| medmcqa | 59.93 | 57.6 | 60.2 | 62.49 | 62.49 | 62.49 | -| medqa_4options | 64.18 | 67.6 | 63.74 | 64.81 | 65.59 | 65.59 | -| anatomy | 71.85 | 63.7 | 71.33 | 70.37 | 71.85 | 72.96 | -| clinical_knowledge | 77.36 | 80.4 | 78.21 | 78.49 | 78.87 | 79.25 | -| college_biology | 82.64 | 88.9 | 84.34 | 84.03 | 85.42 | 86.11 | -| college_medicine | 69.36 | 76.3 | 69.02 | 68.79 | 69.36 | 69.94 | -| medical_genetics | 87.0 | 75.0 | 86.95 | 89.0 | 87.0 | 89.0 | -| professional_medicine | 78.68 | 83.8 | 77.37 | 78.68 | 79.78 | 79.78 | -| pubmedqa | 75.2 | 79.0 | 75.67 | 76.8 | 75.8 | 76.8 | -| Avg. | 74.02 | 74.7 | 74.09 | 74.83 | 75.13 | 75.77 | -| Avg. rank | 4.44 | 2.67 | 4.89 | 3.56 | 2.56 | 1.67 | -| PAVE (win/tie/loss) | 6/3/0 | 3/0/6 | 9/0/0 | 6/1/2 | nan | 0/3/6 | -| Oracle (win/tie/loss) | 9/0/0 | 3/0/6 | 9/0/0 | 6/3/0 | 6/3/0 | nan | - -As shown, PAVE achieves the highest average score across 9 tasks, even surpassing the large-scale model Flan-PaLM-540B. This demonstrates that our system, leveraging multiple models with fewer than 8B parameters, can outperform a single large-scale model in task-specific scenarios. Among learnware retrieval methods, PAVE performs best in 7 out of 9 tasks, tied with Oracle in 6. - -Furthermore, PAVE outperforming Best-single suggests that its effectiveness comes not from a single exceptionally strong model but from its retrieval mechanism and the collective strength of all candidate models. +| User | Qwen2.5-7B | Flan-PaLM-540B | Random | Learnware | Best-single | Oracle | +|:-------------------------|:-------------|:-----------------|:---------|:------------|:--------------|:---------| +| medmcqa | 59.93 | 57.6 | 60.2 | 62.49 | 62.49 | 62.49 | +| medqa_4options | 64.18 | 67.6 | 63.74 | 65.59 | 64.81 | 65.59 | +| anatomy | 71.85 | 63.7 | 71.33 | 71.85 | 70.37 | 72.96 | +| clinical_knowledge | 77.36 | 80.4 | 78.21 | 78.87 | 78.49 | 79.25 | +| college_biology | 82.64 | 88.9 | 84.34 | 85.42 | 84.03 | 86.11 | +| college_medicine | 69.36 | 76.3 | 69.02 | 69.36 | 68.79 | 69.94 | +| medical_genetics | 87.0 | 75.0 | 86.95 | 87.0 | 89.0 | 89.0 | +| professional_medicine | 78.68 | 83.8 | 77.37 | 79.78 | 78.68 | 79.78 | +| pubmedqa | 75.2 | 79.0 | 75.67 | 75.8 | 76.8 | 76.8 | +| Avg. | 74.02 | 74.7 | 74.09 | 75.13 | 74.83 | 75.77 | +| Avg. rank | 4.44 | 2.67 | 4.89 | 2.56 | 3.56 | 1.67 | +| Learnware (win/tie/loss) | 6/3/0 | 3/0/6 | 9/0/0 | nan | 6/1/2 | 0/3/6 | +| Oracle (win/tie/loss) | 9/0/0 | 3/0/6 | 9/0/0 | 6/3/0 | 6/3/0 | nan | + +As shown, Our system achieves the highest average score across 9 tasks, even surpassing the large-scale model Flan-PaLM-540B. This demonstrates that by leveraging multiple models with fewer than 8B parameters, our system can outperform a single large-scale model in task-specific scenarios. Among SLM utilization strategies, Learnware performs best in 7 out of 9 tasks, tied with Oracle in 6. + +Furthermore, the fact that our system surpasses Best-single highlights that its effectiveness comes not from a single exceptionally strong model but from its specification design, identification mechanism and the collective strength of all candidate models. ### Math The table below shows the performance value of different methods or language models in math scenario. -| User | Qwen2.5-7B | Qwen1.5-110B | Random | Best-single | PAVE | Oracle | -|:------------------------------|:-------------|:---------------|:---------|:--------------|:-------|:---------| -| agieval_aqua_rat | 41.73 | 38.98 | 40.09 | 41.33 | 38.98 | 41.73 | -| agieval_gaokao_mathcloze | 16.95 | 38.14 | 11.72 | 13.14 | 17.8 | 17.8 | -| agieval_gaokao_mathqa | 49.86 | 77.78 | 50.35 | 51.0 | 51.57 | 53.42 | -| agieval_math | 19.8 | 19.3 | 20.15 | 18.5 | 20.6 | 28.4 | -| agieval_sat_math | 55.91 | 57.27 | 55.3 | 57.5 | 57.27 | 57.5 | -| cmmlu_college_mathematics | 45.71 | 47.62 | 49.36 | 48.58 | 52.38 | 52.38 | -| cmmlu_elementary_mathematics | 65.65 | 77.83 | 64.49 | 65.0 | 66.96 | 67.18 | -| cmmlu_high_school_mathematics | 61.59 | 77.44 | 62.5 | 64.32 | 60.98 | 64.63 | -| gsm8k | 84.08 | 84.91 | 80.79 | 83.92 | 84.15 | 84.15 | -| mathqa | 43.32 | 48.07 | 41.51 | 46.28 | 41.41 | 46.28 | -| mgsm_native_cot_zh | 66.4 | 68.8 | 67.64 | 68.8 | 73.6 | 73.6 | -| minerva_math | 40.16 | 47.9 | 37.4 | 41.23 | 36.48 | 45.12 | -| abstract_algebra | 54.0 | 53.0 | 53.83 | 52.0 | 56.0 | 56.0 | -| college_mathematics | 53.0 | 52.0 | 53.61 | 53.5 | 53.0 | 58.0 | -| elementary_mathematics | 72.75 | 78.84 | 73.63 | 73.02 | 75.13 | 75.13 | -| high_school_mathematics | 55.93 | 60.0 | 55.21 | 55.19 | 55.56 | 56.86 | -| Avg. | 51.68 | 57.99 | 51.1 | 52.08 | 52.62 | 54.89 | -| Avg. rank | 4.31 | 2.56 | 4.56 | 4.0 | 3.19 | 1.56 | -| PAVE (win/tie/loss) | 10/1/5 | 5/2/9 | 11/0/5 | 10/0/6 | nan | 0/6/10 | -| Oracle (win/tie/loss) | 15/1/0 | 7/0/9 | 16/0/0 | 14/2/0 | 10/6/0 | nan | - -PAVE achieves optimal retrieval performance (tied with Oracle) in 10 out of 16 tasks and even outperforms all other contenders in 5. However, the large-scale model achieves the highest average score and even beats Oracle (which denotes the optimal performance using one of our 8B-level models). This is likely due to their strong reasoning abilities that lack in smaller models, rather than a shortcoming of our method, as evidenced by the minimal difference in the "win/tie/loss" of PAVE and Oracle on Qwen1.5-110B. +| User | Qwen2.5-7B | Qwen1.5-110B | Random | Learnware | Best-single | Oracle | +|:------------------------------|:-------------|:---------------|:---------|:------------|:--------------|:---------| +| agieval_aqua_rat | 41.73 | 38.98 | 40.09 | 38.98 | 41.33 | 41.73 | +| agieval_gaokao_mathcloze | 16.95 | 38.14 | 11.72 | 17.8 | 13.14 | 17.8 | +| agieval_gaokao_mathqa | 49.86 | 77.78 | 50.35 | 51.57 | 51.0 | 53.42 | +| agieval_math | 19.8 | 19.3 | 20.15 | 20.6 | 18.5 | 28.4 | +| agieval_sat_math | 55.91 | 57.27 | 55.3 | 57.27 | 57.5 | 57.5 | +| cmmlu_college_mathematics | 45.71 | 47.62 | 49.36 | 52.38 | 48.58 | 52.38 | +| cmmlu_elementary_mathematics | 65.65 | 77.83 | 64.49 | 66.96 | 65.0 | 67.18 | +| cmmlu_high_school_mathematics | 61.59 | 77.44 | 62.5 | 60.98 | 64.32 | 64.63 | +| gsm8k | 84.08 | 84.91 | 80.79 | 84.15 | 83.92 | 84.15 | +| mathqa | 43.32 | 48.07 | 41.51 | 41.41 | 46.28 | 46.28 | +| mgsm_native_cot_zh | 66.4 | 68.8 | 67.64 | 73.6 | 68.8 | 73.6 | +| minerva_math | 40.16 | 47.9 | 37.4 | 36.48 | 41.23 | 45.12 | +| abstract_algebra | 54.0 | 53.0 | 53.83 | 56.0 | 52.0 | 56.0 | +| college_mathematics | 53.0 | 52.0 | 53.61 | 53.0 | 53.5 | 58.0 | +| elementary_mathematics | 72.75 | 78.84 | 73.63 | 75.13 | 73.02 | 75.13 | +| high_school_mathematics | 55.93 | 60.0 | 55.21 | 55.56 | 55.19 | 56.86 | +| Avg. | 51.68 | 57.99 | 51.1 | 52.62 | 52.08 | 54.89 | +| Avg. rank | 4.31 | 2.56 | 4.56 | 3.19 | 4.0 | 1.56 | +| Learnware (win/tie/loss) | 10/1/5 | 5/2/9 | 11/0/5 | nan | 10/0/6 | 0/6/10 | +| Oracle (win/tie/loss) | 15/1/0 | 7/0/9 | 16/0/0 | 10/6/0 | 14/2/0 | nan | + +Our system achieves optimal identification performance (tied with Oracle) in 10 out of 16 tasks and even outperforms all other contenders in 5. However, the large-scale model achieves the highest average score and even beats Oracle (which denotes the optimal performance using one of our 8B-level models). This is likely due to their strong mathematical reasoning abilities that lack in smaller models, rather than a shortcoming of our method, as evidenced by the minimal difference in the "win/tie/loss" of Learnware and Oracle on Qwen1.5-110B. diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index fa6e2838..33ba5e39 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -30,9 +30,9 @@ def _plot_radar_chart(self, benchmark_name, results_table): labels = list(results_table.index) if benchmark_name == "finance": column_split = [ - ["PAVE", "Qwen2.5-7B", "Llama3.1-8B-Instruct", "Llama3.1-8B"], - ["PAVE", "Qwen1.5-110B", "Qwen2.5-72B", "Llama3.1-70B-Instruct"], - ["PAVE", "Random", "Best-single", "Oracle"] + ["Learnware", "Qwen2.5-7B", "Llama3.1-8B-Instruct", "Llama3.1-8B"], + ["Learnware", "Qwen1.5-110B", "Qwen2.5-72B", "Llama3.1-70B-Instruct"], + ["Learnware", "Random", "Best-single", "Oracle"] ] YTICKS = [0.2, 0.4, 0.6, 0.8, 1.0] ylim = (0, 1.15) @@ -44,18 +44,18 @@ def _plot_radar_chart(self, benchmark_name, results_table): ] elif benchmark_name == "math": column_split = [ - ["PAVE", "Qwen2.5-7B"], - ["PAVE", "Qwen1.5-110B"], - ["PAVE", "Random", "Best-single", "Oracle"] + ["Learnware", "Qwen2.5-7B"], + ["Learnware", "Qwen1.5-110B"], + ["Learnware", "Random", "Best-single", "Oracle"] ] YTICKS = [0.4, 0.6, 0.8, 1.0] ylim = (0.3, 1.3) x_label_fontsize = 5 elif benchmark_name == "medical": column_split = [ - ["PAVE", "Qwen2.5-7B"], - ["PAVE", "Flan-PaLM-540B"], - ["PAVE", "Random", "Best-single", "Oracle"] + ["Learnware", "Qwen2.5-7B"], + ["Learnware", "Flan-PaLM-540B"], + ["Learnware", "Random", "Best-single", "Oracle"] ] YTICKS = [0.8, 0.9, 1.0] ylim = (0.75, 1.1) @@ -69,9 +69,9 @@ def _plot_radar_chart(self, benchmark_name, results_table): fig, axes = plt.subplots(1, 3, figsize=(16, 5), subplot_kw=dict(polar=True)) model_names = [ - "PAVE vs Base Model", - "PAVE vs Large-scale Model", - "Retrieve Learnware" + "Learnware vs Base Model", + "Learnware vs Large-scale Model", + "Specialized SLMs" ] colors = [ @@ -135,14 +135,14 @@ def _anlysis_table(self, benchmark_name, table, score_results): oracle_value = (adaptation_info[["User", "value"]] .groupby(['User']).max()).rename(columns={"value": "Oracle"}) pave_value = (adaptation_info[adaptation_info["Rank-PAVE"] < 1][["User", "value"]] - .groupby(['User']).mean()).rename(columns={"value": "PAVE"}) + .groupby(['User']).mean()).rename(columns={"value": "Learnware"}) # Best-single perf_pivot = perf_merged.pivot(index="User", columns="Learnware", values="value") best_column = perf_pivot.mean().idxmax() best_single = perf_pivot[[best_column]].rename(columns={best_column: 'Best-single'}) - adaptation_table = pd.concat([random_value, best_single, pave_value, oracle_value], axis=1) + adaptation_table = pd.concat([random_value, pave_value, best_single, oracle_value], axis=1) # join performance_extra adaptation_table = performance_extra.join(adaptation_table) @@ -152,11 +152,11 @@ def _anlysis_table(self, benchmark_name, table, score_results): avg_rank = ranks.mean() # PAVE win/tie/loss - pave_scores = adaptation_table["PAVE"] + pave_scores = adaptation_table["Learnware"] win_tie_loss = {} for col in adaptation_table.columns: - if col == "PAVE": + if col == "Learnware": continue win = (pave_scores > adaptation_table[col]).sum() tie = (pave_scores == adaptation_table[col]).sum() @@ -178,7 +178,7 @@ def _anlysis_table(self, benchmark_name, table, score_results): adaptation_table.loc['Avg.'] = adaptation_table.mean() adaptation_table.loc["Avg. rank"] = avg_rank adaptation_table = adaptation_table.round(2) - adaptation_table.loc["PAVE (win/tie/loss)"] = win_tie_loss + adaptation_table.loc["Learnware (win/tie/loss)"] = win_tie_loss adaptation_table.loc["Oracle (win/tie/loss)"] = win_tie_loss_o print(adaptation_table.to_markdown()) From 75e09c60b9368359b6ffc2e6212dd2a85e686e3d Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Tue, 20 May 2025 20:14:55 +0800 Subject: [PATCH 097/108] [MNT] update version to 0.4.0 and enhance changelog --- CHANGES.rst | 11 ++++++++--- learnware/__init__.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ae692d3c..27b9f17e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,12 @@ Changelog ========= -Here you can see the full list of changes between ``learnware`` release. +Here you can see the full list of changes between ``learnware`` releases. -Version 0.3.2 +Version 0.4.0 (2024-05-20) --------------- -This is the first public release of ``learnware`` package. +* Added support for 7B level language model learnwares. +* Added two new specifications, specifically designed for language model learnwares. + +Version 0.3.2 (2024-01-24) +--------------- +* First public release of ``learnware`` package. \ No newline at end of file diff --git a/learnware/__init__.py b/learnware/__init__.py index 97e81afd..b2689385 100644 --- a/learnware/__init__.py +++ b/learnware/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.2.99" +__version__ = "0.4.0" import json import os From be4d7d38e1814b0e14e328bebcebdb6c8191e414 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sat, 24 May 2025 23:05:36 +0800 Subject: [PATCH 098/108] [FIX] update datasets version to 2.16.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a885663d..27dfcb40 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def get_version(rel_path: str) -> str: "qpsolvers[clarabel]>=4.0.1", "geatpy>=2.7.0;python_version<'3.11'", "trl>=0.11.4", - "datasets>=3.1.0", + "datasets>=2.16.0", "peft>=0.13.2", "lm_eval>=0.4.7" ] From 5a7e4a73220b73be3da0c1a8702a76361a0fad78 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sun, 25 May 2025 00:23:58 +0800 Subject: [PATCH 099/108] [DOC] update llm-related readme --- README.md | 45 + README_zh.md | 45 + docs/_static/img/llm-finance.svg | 3650 +++++++++++++++++++++++ docs/_static/img/llm-math.svg | 3643 ++++++++++++++++++++++ docs/_static/img/llm-medical.svg | 2721 +++++++++++++++++ examples/dataset_llm_workflow/README.md | 32 +- 6 files changed, 10132 insertions(+), 4 deletions(-) create mode 100644 docs/_static/img/llm-finance.svg create mode 100644 docs/_static/img/llm-math.svg create mode 100644 docs/_static/img/llm-medical.svg diff --git a/README.md b/README.md index 0116a3c3..7793588f 100644 --- a/README.md +++ b/README.md @@ -392,6 +392,51 @@ The results are depicted in the following table and figure. Similarly, even when
+# LLM Experimental Results (New) + +This section refers to Section 4 of our paper [*Learnware of Language Models: Specialized Small Language Models Can Do Big*](https://arxiv.org/abs/2505.13425). We simulate a learnware system comprising approximately 100 learnwares of specialized SLMs with 8B parameters, fine-tuned across finance, healthcare, and mathematics domains. + +Experimental results demonstrate promising performance: by selecting one suitable learnware for each task-specific inference, the system outperforms the base SLMs on all benchmarks. Compared to LLMs, the system outperforms Qwen1.5-110B, Qwen2.5-72B, and Llama3.1-70B-Instruct by at least 14% in finance domain tasks. Additionally, it surpasses Flan-PaLM-540B (ranked 7th on the [Open Medical LLM Leaderboard](https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard)) in medical domain tasks. + +The figure and table below show the performance value in finance scenario. + +
+ +
+ +
+ +| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Learnware | Best-single | Oracle | +|:-------------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:------------|:--------------|:---------| +| australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 56.83 | 42.21 | 56.83 | +| cra_lendingclub | 80.82 | 76.33 | 57.34 | 80.82 | 47.01 | 53.07 | 81.52 | 92.07 | 80.82 | 92.07 | +| fiqasa | 38.3 | 40.43 | 56.17 | 63.4 | 64.26 | 68.51 | 46.53 | 76.38 | 32.06 | 76.38 | +| fpb | 76.08 | 32.78 | 30.72 | 70.72 | 78.35 | 78.04 | 67.95 | 84.25 | 77.73 | 84.25 | +| german | 65.0 | 49.5 | 66.0 | 66.0 | 66.5 | 43.5 | 51.5 | 67.06 | 65.33 | 67.06 | +| headlines | 74.81 | 59.95 | 59.95 | 62.96 | 77.84 | 77.53 | 72.43 | 95.61 | 95.61 | 95.61 | +| ner | 21.75 | 0.62 | 9.01 | 17.89 | 9.36 | 9.52 | 24.99 | 52.79 | 23.98 | 52.79 | +| sm_acl | 51.1 | 51.4 | 51.34 | 49.3 | 51.56 | 49.38 | 51.42 | 52.82 | 50.71 | 53.63 | +| sm_bigdata | 55.3 | 55.57 | 52.79 | 51.02 | 50.27 | 47.76 | 53.86 | 52.4 | 55.52 | 55.88 | +| sm_cikm | 58.44 | 54.24 | 54.07 | 44.01 | 58.27 | 47.86 | 55.89 | 55.99 | 57.98 | 58.52 | +| causal20_sc | 65.14 | 88.48 | 79.45 | 83.75 | 76.17 | 87.16 | 74.71 | 84.17 | 88.61 | 88.61 | +| finarg_ecc_arc | 64.78 | 46.67 | 60.0 | 62.32 | 63.04 | 44.64 | 62.27 | 64.31 | 57.87 | 68.36 | +| finarg_ecc_auc | 48.3 | 51.81 | 49.85 | 55.01 | 61.71 | 65.02 | 52.08 | 58.08 | 48.68 | 58.08 | +| fomc | 60.48 | 29.44 | 34.68 | 58.47 | 57.66 | 66.13 | 56.05 | 62.7 | 61.36 | 62.7 | +| ma | 79.2 | 56.4 | 51.0 | 81.4 | 84.6 | 83.2 | 73.64 | 79.81 | 79.27 | 79.81 | +| mlesg | 35.67 | 32.67 | 20.0 | 34.67 | 38.67 | 42.33 | 31.99 | 33.42 | 38.33 | 38.33 | +| multifin_en | 60.99 | 31.32 | 28.39 | 65.38 | 63.55 | 68.5 | 54.96 | 63.46 | 58.61 | 63.46 | +| Avg. | 57.61 | 47.19 | 47.29 | 58.25 | 58.35 | 57.63 | 56.25 | 66.6 | 59.69 | 67.79 | +| Avg. rank | 5.94 | 7.35 | 7.82 | 5.94 | 4.71 | 5.24 | 6.47 | 2.88 | 5.47 | 1.65 | +| Learnware (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | nan | 12/1/4 | 0/11/6 | +| Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 6/11/0 | 14/3/0 | nan | + +
+ +Our system demonstrates strong performance across financial tasks, achieving the highest average score among all methods, delivering an nearly 14\% improvement compared with the best large-scale model Qwen2.5-72B. It ranks first strategies utilizing specialized SLMs except Oracle in 13 out of 17 tasks, identifies the optimal learnware (tied with Oracle) on 11 and outperforms all contenders in 8. + +These results shows that our system can match or surpass large-scale models with over 70B parameters under the Task-Level evaluation setting, while requiring only the memory for models under 8B efficiently. + +**For more scenarios (medical and math) and details, please see [here](./examples/dataset_llm_workflow/README.md).** # Citation diff --git a/README_zh.md b/README_zh.md index 4a363725..522a686a 100644 --- a/README_zh.md +++ b/README_zh.md @@ -398,6 +398,51 @@ feature_augment_predict_y = reuse_feature_augment.predict(user_data=test_x)
+# LLM 实验结果(新增) + +本节对应于我们的论文 [*Learnware of Language Models: Specialized Small Language Models Can Do Big*](https://arxiv.org/abs/2505.13425) 的第 4 部分。我们模拟建立了一个含有约 100 个 8B 级别专用 SLM 学件的学件基座系统,涵盖金融、医疗和数学三个领域。 + +实验结果展现了我们系统的良好性能:通过为每个专用领域任务选择一个合适的学件,该系统在所有场景的基准测试中均优于基座 SLM 以及基线算法;与 70B 以上的大参数规模语言模型相比,该系统在大幅减少显存占用的情况下,在金融领域中的性能表现至少比 Qwen1.5-110B、Qwen2.5-72B 和 Llama3.1-70B-Instruct 高出 14%。此外,在医疗领域中,它超越了 Flan-PaLM-540B(在 [Open Medical LLM Leaderboard](https://huggingface.co/spaces/openlifescienceai/open_medical_llm_leaderboard) 上排名第七)。 + +下图和表格展示了不同方法或模型在金融评估场景上的性能分数: + +
+ +
+ +
+ +| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Learnware | Best-single | Oracle | +|:-------------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:------------|:--------------|:---------| +| australian | 43.17 | 44.6 | 43.17 | 43.17 | 43.17 | 47.48 | 44.45 | 56.83 | 42.21 | 56.83 | +| cra_lendingclub | 80.82 | 76.33 | 57.34 | 80.82 | 47.01 | 53.07 | 81.52 | 92.07 | 80.82 | 92.07 | +| fiqasa | 38.3 | 40.43 | 56.17 | 63.4 | 64.26 | 68.51 | 46.53 | 76.38 | 32.06 | 76.38 | +| fpb | 76.08 | 32.78 | 30.72 | 70.72 | 78.35 | 78.04 | 67.95 | 84.25 | 77.73 | 84.25 | +| german | 65.0 | 49.5 | 66.0 | 66.0 | 66.5 | 43.5 | 51.5 | 67.06 | 65.33 | 67.06 | +| headlines | 74.81 | 59.95 | 59.95 | 62.96 | 77.84 | 77.53 | 72.43 | 95.61 | 95.61 | 95.61 | +| ner | 21.75 | 0.62 | 9.01 | 17.89 | 9.36 | 9.52 | 24.99 | 52.79 | 23.98 | 52.79 | +| sm_acl | 51.1 | 51.4 | 51.34 | 49.3 | 51.56 | 49.38 | 51.42 | 52.82 | 50.71 | 53.63 | +| sm_bigdata | 55.3 | 55.57 | 52.79 | 51.02 | 50.27 | 47.76 | 53.86 | 52.4 | 55.52 | 55.88 | +| sm_cikm | 58.44 | 54.24 | 54.07 | 44.01 | 58.27 | 47.86 | 55.89 | 55.99 | 57.98 | 58.52 | +| causal20_sc | 65.14 | 88.48 | 79.45 | 83.75 | 76.17 | 87.16 | 74.71 | 84.17 | 88.61 | 88.61 | +| finarg_ecc_arc | 64.78 | 46.67 | 60.0 | 62.32 | 63.04 | 44.64 | 62.27 | 64.31 | 57.87 | 68.36 | +| finarg_ecc_auc | 48.3 | 51.81 | 49.85 | 55.01 | 61.71 | 65.02 | 52.08 | 58.08 | 48.68 | 58.08 | +| fomc | 60.48 | 29.44 | 34.68 | 58.47 | 57.66 | 66.13 | 56.05 | 62.7 | 61.36 | 62.7 | +| ma | 79.2 | 56.4 | 51.0 | 81.4 | 84.6 | 83.2 | 73.64 | 79.81 | 79.27 | 79.81 | +| mlesg | 35.67 | 32.67 | 20.0 | 34.67 | 38.67 | 42.33 | 31.99 | 33.42 | 38.33 | 38.33 | +| multifin_en | 60.99 | 31.32 | 28.39 | 65.38 | 63.55 | 68.5 | 54.96 | 63.46 | 58.61 | 63.46 | +| Avg. | 57.61 | 47.19 | 47.29 | 58.25 | 58.35 | 57.63 | 56.25 | 66.6 | 59.69 | 67.79 | +| Avg. rank | 5.94 | 7.35 | 7.82 | 5.94 | 4.71 | 5.24 | 6.47 | 2.88 | 5.47 | 1.65 | +| Learnware (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | nan | 12/1/4 | 0/11/6 | +| Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 6/11/0 | 14/3/0 | nan | + +
+ +我们的系统在金融任务中表现出色,在所有方法中取得了最高的平均得分,比表现最好的大参数规模模型 Qwen2.5-72B 性能提高了14\%。在 17 个任务中,有 13 个任务的得分高于除 Oracle 外的专用 SLM 模型选择方法,在11个任务上查搜到了最优学件(性能表现与Oracle一致),在 8 个任务上战胜了所有其他方法或模型。 + +上述结果表明,在任务级评估的实验设定下,仅查搜使用参数规模在 8B 级别的小型语言模型,学件基座系统的整体表现可以媲美甚至超越参数规模在 70B 以上的大模型,并大幅降低模型推理时的显存占用。 + +**更多场景(医学和数学)上的实验结果和详细信息,请参阅[此处](./examples/dataset_llm_workflow/README.md)。** # 引用 diff --git a/docs/_static/img/llm-finance.svg b/docs/_static/img/llm-finance.svg new file mode 100644 index 00000000..53c53f28 --- /dev/null +++ b/docs/_static/img/llm-finance.svg @@ -0,0 +1,3650 @@ + + + + + + + + 2025-05-24T23:34:13.998171 + image/svg+xml + + + Matplotlib v3.9.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/img/llm-math.svg b/docs/_static/img/llm-math.svg new file mode 100644 index 00000000..17e18311 --- /dev/null +++ b/docs/_static/img/llm-math.svg @@ -0,0 +1,3643 @@ + + + + + + + + 2025-05-24T23:34:45.863194 + image/svg+xml + + + Matplotlib v3.9.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/img/llm-medical.svg b/docs/_static/img/llm-medical.svg new file mode 100644 index 00000000..44a274c8 --- /dev/null +++ b/docs/_static/img/llm-medical.svg @@ -0,0 +1,2721 @@ + + + + + + + + 2025-05-24T23:33:44.823111 + image/svg+xml + + + Matplotlib v3.9.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/dataset_llm_workflow/README.md b/examples/dataset_llm_workflow/README.md index 5897ece5..dd86e694 100644 --- a/examples/dataset_llm_workflow/README.md +++ b/examples/dataset_llm_workflow/README.md @@ -2,7 +2,7 @@ ## Introduction -This workflow refers to Section 4 of our paper [*Learnware of Language Models: Specialized Small Language Models Can Do Big*](https://arxiv.org/abs/2505.13425). We build three learnware dock systems of 8B-level LLMs across three domains: finance, healthcare, and mathematics. We evaluate them on public evaluation benchmarks. +This workflow refers to Section 4 of our paper [*Learnware of Language Models: Specialized Small Language Models Can Do Big*](https://arxiv.org/abs/2505.13425). We simulate a learnware system comprising approximately 100 learnwares of specialized SLMs with 8B parameters, fine-tuned across finance, healthcare, and mathematics domains. We first train multiple models under different configurations by SFT on different datasets using LoRA. Qwen2.5-7B, Llama3.1-8B, Llama3.1-8B-Instruct are our base models. Then we generate specifications for each model and apply a identification algorithm to select the most suitable learnware based on user task requirements. The identified learnware is then evaluated on the corresponding task under the **Task-Level** evaluation setting using EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness). @@ -43,7 +43,13 @@ cp -r extra_tasks/flare ~/anaconda3/envs/{env_name}/lib/python3.11/site-packages ### Finance -The table below shows the performance value of different methods or language models in finance scenario. +The figure and table below show the performance value of different methods or language models in finance scenario. + +
+ +
+ +
| User | Qwen2.5-7B | Llama3.1-8B-Instruct | Llama3.1-8B | Qwen1.5-110B | Qwen2.5-72B | Llama3.1-70B-Instruct | Random | Learnware | Best-single | Oracle | |:-------------------------|:-------------|:-----------------------|:--------------|:---------------|:--------------|:------------------------|:---------|:------------|:--------------|:---------| @@ -69,13 +75,21 @@ The table below shows the performance value of different methods or language mod | Learnware (win/tie/loss) | 13/0/4 | 15/0/2 | 16/0/1 | 14/0/3 | 12/0/5 | 11/0/6 | 16/0/1 | nan | 12/1/4 | 0/11/6 | | Oracle (win/tie/loss) | 17/0/0 | 17/0/0 | 17/0/0 | 15/0/2 | 13/0/4 | 12/0/5 | 17/0/0 | 6/11/0 | 14/3/0 | nan | +
+ Our system demonstrates strong performance across financial tasks, achieving the highest average score among all methods, delivering an nearly 14\% improvement compared with the best large-scale model Qwen2.5-72B. It ranks first strategies utilizing specialized SLMs except Oracle in 13 out of 17 tasks, identifies the optimal learnware (tied with Oracle) on 11 and outperforms all contenders in 8. These results shows that our system can match or surpass large-scale models with over 70B parameters under the Task-Level evaluation setting, while requiring only the memory for models under 8B efficiently. ### Medical -The table below shows the performance value of different methods or language models in medical scenario. +The figure and table below show the performance value of different methods or language models in medical scenario. + +
+ +
+ +
| User | Qwen2.5-7B | Flan-PaLM-540B | Random | Learnware | Best-single | Oracle | |:-------------------------|:-------------|:-----------------|:---------|:------------|:--------------|:---------| @@ -93,13 +107,21 @@ The table below shows the performance value of different methods or language mod | Learnware (win/tie/loss) | 6/3/0 | 3/0/6 | 9/0/0 | nan | 6/1/2 | 0/3/6 | | Oracle (win/tie/loss) | 9/0/0 | 3/0/6 | 9/0/0 | 6/3/0 | 6/3/0 | nan | +
+ As shown, Our system achieves the highest average score across 9 tasks, even surpassing the large-scale model Flan-PaLM-540B. This demonstrates that by leveraging multiple models with fewer than 8B parameters, our system can outperform a single large-scale model in task-specific scenarios. Among SLM utilization strategies, Learnware performs best in 7 out of 9 tasks, tied with Oracle in 6. Furthermore, the fact that our system surpasses Best-single highlights that its effectiveness comes not from a single exceptionally strong model but from its specification design, identification mechanism and the collective strength of all candidate models. ### Math -The table below shows the performance value of different methods or language models in math scenario. +The figure and table below show the performance value of different methods or language models in math scenario. + +
+ +
+ +
| User | Qwen2.5-7B | Qwen1.5-110B | Random | Learnware | Best-single | Oracle | |:------------------------------|:-------------|:---------------|:---------|:------------|:--------------|:---------| @@ -124,4 +146,6 @@ The table below shows the performance value of different methods or language mod | Learnware (win/tie/loss) | 10/1/5 | 5/2/9 | 11/0/5 | nan | 10/0/6 | 0/6/10 | | Oracle (win/tie/loss) | 15/1/0 | 7/0/9 | 16/0/0 | 10/6/0 | 14/2/0 | nan | +
+ Our system achieves optimal identification performance (tied with Oracle) in 10 out of 16 tasks and even outperforms all other contenders in 5. However, the large-scale model achieves the highest average score and even beats Oracle (which denotes the optimal performance using one of our 8B-level models). This is likely due to their strong mathematical reasoning abilities that lack in smaller models, rather than a shortcoming of our method, as evidenced by the minimal difference in the "win/tie/loss" of Learnware and Oracle on Qwen1.5-110B. From 9f60651ebbab12a7797304a973bb8727276b621c Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Sun, 25 May 2025 13:12:54 +0800 Subject: [PATCH 100/108] [MNT] update version to 0.4.0.post1 and add changelog entry for bugfix release --- CHANGES.rst | 6 +++++- learnware/__init__.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 27b9f17e..afd27c87 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,7 +2,11 @@ Changelog ========= Here you can see the full list of changes between ``learnware`` releases. -Version 0.4.0 (2024-05-20) +Version 0.4.0.post1 (2025-05-25) +--------------- +* Bugfix release. + +Version 0.4.0 (2025-05-20) --------------- * Added support for 7B level language model learnwares. * Added two new specifications, specifically designed for language model learnwares. diff --git a/learnware/__init__.py b/learnware/__init__.py index b2689385..85ae6577 100644 --- a/learnware/__init__.py +++ b/learnware/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.4.0" +__version__ = "0.4.0.post1" import json import os From 233c185174af605496894bc4e6666e8640301946 Mon Sep 17 00:00:00 2001 From: zzc <1196529906@qq.com> Date: Sun, 25 May 2025 16:41:47 +0800 Subject: [PATCH 101/108] [MNT] fix the code style --- .../benchmark/__init__.py | 2 +- .../dataset_llm_workflow/benchmark/base.py | 33 ++- .../dataset_llm_workflow/benchmark/config.py | 21 +- .../dataset_llm_workflow/benchmark/utils.py | 109 +++---- examples/dataset_llm_workflow/build_market.py | 199 ------------- examples/dataset_llm_workflow/eval_config.py | 6 +- .../extra_tasks/flare/flare.py | 274 ++++++++++++------ examples/dataset_llm_workflow/workflow.py | 174 ++++++----- examples/dataset_table_workflow/base.py | 16 +- learnware/client/learnware_client.py | 6 +- learnware/config.py | 2 +- learnware/learnware/__init__.py | 17 +- learnware/market/easy/checker.py | 10 +- learnware/market/easy/searcher.py | 4 +- learnware/market/llm/organizer.py | 4 +- learnware/market/llm/searcher.py | 18 +- learnware/model/base.py | 2 +- learnware/specification/module.py | 19 +- learnware/specification/regular/__init__.py | 2 +- learnware/specification/regular/base.py | 13 +- .../specification/regular/text/__init__.py | 4 +- .../specification/regular/text/generative.py | 144 +++++---- .../llm_general_capability_spec/__init__.py | 6 +- .../llm_general_capability_spec/config.py | 57 ++-- .../llm_general_capability_spec/spec.py | 22 +- learnware/tests/benchmarks/__init__.py | 16 +- .../tests/benchmarks/llm_process_funcs.py | 44 +-- setup.py | 2 +- tests/test_specification/test_general_spec.py | 7 +- .../test_text_generative.py | 49 ++-- .../text_generative_utils.py | 8 +- 31 files changed, 590 insertions(+), 700 deletions(-) delete mode 100644 examples/dataset_llm_workflow/build_market.py diff --git a/examples/dataset_llm_workflow/benchmark/__init__.py b/examples/dataset_llm_workflow/benchmark/__init__.py index eb171900..2650b0f2 100644 --- a/examples/dataset_llm_workflow/benchmark/__init__.py +++ b/examples/dataset_llm_workflow/benchmark/__init__.py @@ -1 +1 @@ -from .base import Benchmark \ No newline at end of file +from .base import Benchmark diff --git a/examples/dataset_llm_workflow/benchmark/base.py b/examples/dataset_llm_workflow/benchmark/base.py index 72813b2b..d6a8f8ed 100644 --- a/examples/dataset_llm_workflow/benchmark/base.py +++ b/examples/dataset_llm_workflow/benchmark/base.py @@ -1,13 +1,24 @@ -from .config import LEARNWARE_MATH, LEARNWARE_MED, USER_MED, USER_MATH, LEARNWARE_FIN, USER_FIN, LEARNWARE_MED_IDS, LEARNWARE_MATH_IDS, LEARNWARE_FIN_IDS +from .config import ( + LEARNWARE_MATH, + LEARNWARE_MED, + USER_MED, + USER_MATH, + LEARNWARE_FIN, + USER_FIN, + LEARNWARE_MED_IDS, + LEARNWARE_MATH_IDS, + LEARNWARE_FIN_IDS, +) from .utils import prepare_train_data, prepare_test_data from datasets import Dataset from typing import List, Tuple + class Benchmark: def __init__(self, name: str): self.name = name self.set_datasets(name) - + def get_benchmark_name(self): return self.name @@ -26,36 +37,36 @@ def set_datasets(self, name: str): self.user_dict = USER_FIN else: raise NotImplementedError("other benchmarks are not implemented") - + def get_learnware_ids(self) -> List[str]: return self.learnware_ids - + def get_learnware_data(self, dataset_name) -> List[str]: train_dataset, val_dataset = prepare_train_data(self.learnware_dict[dataset_name]) train_data, val_data = train_dataset["text"], val_dataset["text"] return train_data, val_data - + def get_learnware_dataset(self, dataset_name) -> Tuple[Dataset, Dataset]: train_dataset, val_dataset = prepare_train_data(self.learnware_dict[dataset_name]) return train_dataset, val_dataset - + def get_user_data(self, dataset_name) -> List[str]: test_dataset = prepare_test_data(self.user_dict[dataset_name]) test_data = test_dataset["text"] return test_data - + def get_user_dataset(self, dataset_name) -> Dataset: test_dataset = prepare_test_data(self.user_dict[dataset_name]) return test_dataset - + def get_learnwares(self): return self.learnware_dict - + def get_users(self): return self.user_dict - + def get_learnware_names(self) -> List[str]: return list(self.learnware_dict.keys()) - + def get_user_names(self) -> List[str]: return list(self.user_dict.keys()) diff --git a/examples/dataset_llm_workflow/benchmark/config.py b/examples/dataset_llm_workflow/benchmark/config.py index 18b46f37..3f4aa0b1 100644 --- a/examples/dataset_llm_workflow/benchmark/config.py +++ b/examples/dataset_llm_workflow/benchmark/config.py @@ -1,22 +1,19 @@ LEARNWARE_MATH = { "MWP-Instruct": "Macropodus/MWP-Instruct", - # "goat": "tiedong/goat", "school_math_0.25M": "BelleGroup/school_math_0.25M", "MathInstruct": "TIGER-Lab/MathInstruct", "MetaMathQA": "meta-math/MetaMathQA", "orca-math-word-problems-200k": "microsoft/orca-math-word-problems-200k", "Arithmo-Data": "akjindal53244/Arithmo-Data", - # "MATH-K-100-train": "Dahoas/MATH-K-100-train", "MATH_train": "ScalableMath/MATH_train-cleaned_processed", "MetaMath-GSM240K": "fxmeng/MetaMath-GSM240K", "GSM8K_zh": "meta-math/GSM8K_zh", } LEARNWARE_MED = { - "AlpaCare": "lavita/AlpaCare-MedInstruct-52k", + "AlpaCare": "lavita/AlpaCare-MedInstruct-52k", "ChatDoctor": "lavita/ChatDoctor-HealthCareMagic-100k", "medalpaca_cleaned": "medalpaca/medical_meadow_wikidoc,medalpaca/medical_meadow_medical_flashcards,medalpaca/medical_meadow_wikidoc_patient_information,medalpaca/medical_meadow_pubmed_causal,medalpaca/medical_meadow_mediqa,medalpaca/medical_meadow_health_advice", - # "medical_flashcards": "medalpaca/medical_meadow_medical_flashcards", "medqa_train": "medalpaca/medical_meadow_medqa", "pubmed_causal": "medalpaca/medical_meadow_pubmed_causal", "medmcqa_train": "chenhaodev/medmcqa_instruct", @@ -37,12 +34,6 @@ "sm_acl": "ChanceFocus/flare-sm-acl", "sm_bigdata": "TheFinAI/en-forecasting-bigdata", "sm_cikm": "ChanceFocus/flare-sm-cikm", - # "convfinqa": "ChanceFocus/flare-convfinqa", - # "finqa": "ChanceFocus/flare-finqa", - # "cra_ccf": "ChanceFocus/cra-ccf", - # "cra_ccfraud": "ChanceFocus/cra-ccfraud", - # "cra_polish": "ChanceFocus/cra-polish", - # "cra_taiwan": "TheFinAI/cra-taiwan", } USER_MED = { @@ -69,13 +60,6 @@ "gsm8k": "gsm8k,main", "mathqa": "allenai/math_qa", "mgsm_native_cot_zh": "juletxara/mgsm,zh", - # "minerva_math_algebra": "EleutherAI/hendrycks_math,algebra", - # "minerva_math_counting_and_prob": "EleutherAI/hendrycks_math,counting_and_probability", - # "minerva_math_geometry": "EleutherAI/hendrycks_math,geometry", - # "minerva_math_intermediate_algebra": "EleutherAI/hendrycks_math,intermediate_algebra", - # "minerva_math_num_theory": "EleutherAI/hendrycks_math,number_theory", - # "minerva_math_prealgebra": "EleutherAI/hendrycks_math,prealgebra", - # "minerva_math_precalculus": "EleutherAI/hendrycks_math,precalculus", "minerva_math": "lighteval/MATH,all", "abstract_algebra": "hails/mmlu_no_train,abstract_algebra", "college_mathematics": "hails/mmlu_no_train,college_mathematics", @@ -143,7 +127,6 @@ "00002818", "00002819", "00002820", - "00002820", "00002821", "00002822", "00002823", @@ -213,4 +196,4 @@ "00002887", "00002888", "00002889", -] \ No newline at end of file +] diff --git a/examples/dataset_llm_workflow/benchmark/utils.py b/examples/dataset_llm_workflow/benchmark/utils.py index 669c8d86..a8be1406 100644 --- a/examples/dataset_llm_workflow/benchmark/utils.py +++ b/examples/dataset_llm_workflow/benchmark/utils.py @@ -1,6 +1,6 @@ import re import random -from datasets import load_dataset, concatenate_datasets +from datasets import load_dataset, concatenate_datasets from typing import List from .config import LEARNWARE_FIN, LEARNWARE_MATH, LEARNWARE_MED, USER_FIN @@ -140,7 +140,7 @@ def preprocess_finance(docs) -> str: outputs = docs["answer"] texts = [] for instruction, output in zip(instructions, outputs): - instruction.rstrip(' Answer:') + instruction.rstrip(" Answer:") text = alpaca_no_input_prompt.format(instruction, output) texts.append(text) return texts @@ -240,29 +240,18 @@ def preprocess_medqa_val(docs): def preprocess_mmlu(doc) -> str: question = doc["question"].strip() choices = doc["choices"] - return "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer:".format( - question, - choices[0], - choices[1], - choices[2], - choices[3] - ) + return "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer:".format(question, choices[0], choices[1], choices[2], choices[3]) def preprocess_mmlu_val(docs): questions = docs["question"] choices = docs["choices"] - answers = docs["answer"] + answers = docs["answer"] texts = [] for question, options, answer in zip(questions, choices, answers): texts.append( "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer: {}".format( - question.strip(), - options[0], - options[1], - options[2], - options[3], - ["A", "B", "C", "D"][answer] + question.strip(), options[0], options[1], options[2], options[3], ["A", "B", "C", "D"][answer] ) ) return texts @@ -293,13 +282,7 @@ def preprocess_agieval(doc) -> str: def preprocess_cmmlu(doc) -> str: question = doc["Question"].strip() - return "{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format( - question, - doc["A"], - doc["B"], - doc["C"], - doc["D"] - ) + return "{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format(question, doc["A"], doc["B"], doc["C"], doc["D"]) def preprocess_cmmlu_val(docs): @@ -308,23 +291,19 @@ def preprocess_cmmlu_val(docs): bs = docs["B"] cs = docs["C"] ds = docs["D"] - answers = docs["Answer"] + answers = docs["Answer"] texts = [] for question, a, b, c, d, answer in zip(questions, as_, bs, cs, ds, answers): - texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format( - question.strip(), a, b, c, d, answer - )) + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format(question.strip(), a, b, c, d, answer)) return texts def preprocess_mathqa(doc) -> str: - return "Question: {}\nAnswer:".format( - doc["Problem"] - ) + return "Question: {}\nAnswer:".format(doc["Problem"]) def preprocess_mgsm(doc) -> str: - return "问题: "+doc["question"]+"\n逐步解答:" + return "问题: " + doc["question"] + "\n逐步解答:" def preprocess_gsm8k(doc) -> str: @@ -337,15 +316,17 @@ def preprocess_mathqa_val(docs): options = docs["options"] texts = [] for problem, correct, option in zip(problems, corrects, options): - choices = [ - c[4:].rstrip(" ,") - for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option) - ] - + choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option)] + # answer = ['a', 'b', 'c', 'd', 'e'].index(correct) - texts.append("Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format(problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct)) + texts.append( + "Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format( + problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct + ) + ) return texts + def preprocess_mgsm_val(docs): questions = docs["question"] answers = docs["answer"] @@ -459,32 +440,34 @@ def preprocess_finance_test(doc) -> str: def prepare_train_data(dataset_name_str): if dataset_name_str in list(PROCESS_FUNC_WITH_LABEL.keys()): dataset = load_dataset(dataset_name_str, split="train") - if dataset_name_str == "meta-math/GSM8K_zh": - dataset = dataset.filter(lambda x: x['split']=='train') - dataset = dataset.map(lambda x: {"text": PROCESS_FUNC_WITH_LABEL[dataset_name_str](x)}, batched = True) + if dataset_name_str == "meta-math/GSM8K_zh": + dataset = dataset.filter(lambda x: x["split"] == "train") + dataset = dataset.map(lambda x: {"text": PROCESS_FUNC_WITH_LABEL[dataset_name_str](x)}, batched=True) split_dataset = dataset.train_test_split(test_size=0.1) - train_dataset = split_dataset['train'] - val_dataset = split_dataset['test'] + train_dataset = split_dataset["train"] + val_dataset = split_dataset["test"] elif dataset_name_str in list(LEARNWARE_FIN.values()): - train_dataset = load_dataset(dataset_name_str, split="train") + train_dataset = load_dataset(dataset_name_str, split="train") if "cra" not in dataset_name_str: - val_dataset = load_dataset(dataset_name_str, split="valid") + val_dataset = load_dataset(dataset_name_str, split="valid") else: - val_dataset = load_dataset(dataset_name_str, split="validation") - train_dataset = train_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched = True) - val_dataset = val_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched = True) + val_dataset = load_dataset(dataset_name_str, split="validation") + train_dataset = train_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched=True) + val_dataset = val_dataset.map(lambda x: {"text": preprocess_finance(x)}, batched=True) else: - dataset_list = dataset_name_str.split(',') + dataset_list = dataset_name_str.split(",") train_datasets = [] for dataset_name in dataset_list: - dataset = load_dataset(dataset_name, split="train") - dataset = dataset.remove_columns([col for col in dataset.column_names if col not in ['instruction', 'input', 'output']]) + dataset = load_dataset(dataset_name, split="train") + dataset = dataset.remove_columns( + [col for col in dataset.column_names if col not in ["instruction", "input", "output"]] + ) train_datasets.append(dataset) combined_dataset = concatenate_datasets(train_datasets) - combined_dataset = combined_dataset.map(lambda x: {"text": preprocess_alpaca(x)}, batched = True) + combined_dataset = combined_dataset.map(lambda x: {"text": preprocess_alpaca(x)}, batched=True) split_dataset = combined_dataset.train_test_split(test_size=0.1) - train_dataset = split_dataset['train'] - val_dataset = split_dataset['test'] + train_dataset = split_dataset["train"] + val_dataset = split_dataset["test"] return train_dataset, val_dataset @@ -496,30 +479,20 @@ def prepare_test_data(dataset_name_str): subset_name = temp_list[1] dataset_name = temp_list[0] if subset_name: - test_dataset = load_dataset(dataset_name, subset_name, split="test") + test_dataset = load_dataset(dataset_name, subset_name, split="test") else: - test_dataset = load_dataset(dataset_name, split="test") - + test_dataset = load_dataset(dataset_name, split="test") + if dataset_name == "gsm8k": rnd = random.Random(1234) train_dataset = load_dataset(dataset_name, "main", split="train") train_dataset = train_dataset.map(lambda x: {"text": preprocess_gsm8k_val(x)}, batched=True) train_docs = train_dataset["text"] fewshot_examples = rnd.sample(train_docs, 5) - fewshot_context = ( - "\n\n".join(fewshot_examples) + "\n\n" - ) + fewshot_context = "\n\n".join(fewshot_examples) + "\n\n" test_dataset = test_dataset.map(lambda x: {"text": fewshot_context + preprocess_gsm8k(x)}) elif dataset_name == "lighteval/MATH": - fewshot_context = ( - "\n\n".join( - [ - math_fewshot_prompt(example) - for example in math_fewshot_samples() - ] - ) - + "\n\n" - ) + fewshot_context = "\n\n".join([math_fewshot_prompt(example) for example in math_fewshot_samples()]) + "\n\n" test_dataset = test_dataset.map(lambda x: {"text": fewshot_context + preprocess_math(x)}) elif dataset_name in list(USER_FIN.values()): test_dataset = test_dataset.map(lambda x: {"text": preprocess_finance_test(x)}) diff --git a/examples/dataset_llm_workflow/build_market.py b/examples/dataset_llm_workflow/build_market.py deleted file mode 100644 index 713affc2..00000000 --- a/examples/dataset_llm_workflow/build_market.py +++ /dev/null @@ -1,199 +0,0 @@ -import shutil -from learnware.market import instantiate_learnware_market -from learnware.specification import generate_semantic_spec -from learnware.specification.module import generate_generative_model_spec, generate_rkme_text_spec - -from benchmark import Benchmark -from benchmark.config import LEARNWARE_FIN, LEARNWARE_MED, LEARNWARE_MATH - -import os -import zipfile -import torch - - -def prepare_learnware(benchmark_name, name): - dataset_name = name - default_path = "learnware_pool/default/" - - if dataset_name == "fiqasa": - base_model = "Meta-Llama-3.1-8B" - elif dataset_name == "australian": - base_model = "Meta-Llama-3.1-8B-Instruct" - else: - base_model = "Qwen2.5-7B" - - model_folder = f"models/{base_model}/{dataset_name}" - versions = sorted(os.listdir(model_folder)) - - for i, version in enumerate(versions): - folder_path = f"learnware_pool/{benchmark_name}/learnwares/{dataset_name}-{i+1}" - os.makedirs(folder_path, exist_ok=True) - copy_adapter(folder_path, version, model_folder) - update_from_default(folder_path, os.path.join(default_path, base_model)) - build_specification_from_cache(folder_path, dataset_name) - zip_dir = f"learnware_pool/{benchmark_name}/zips" - os.makedirs(zip_dir, exist_ok=True) - zip_path = os.path.join(zip_dir, f"{dataset_name}-{i+1}.zip") - compress_folder_to_zip(folder_path, zip_path) - - -def add_learnware_to_market(benchmark_name, name, market): - dataset_name = name - default_path = "learnware_pool/default/" - benchmark2scenario = { - "medical": "Health", - "finance": "Financial", - "math": "Others" - } - - if dataset_name == "fiqasa": - base_model = "Meta-Llama-3.1-8B" - base_model_path = "NousResearch/Meta-Llama-3.1-8B" - license = "Others" - elif dataset_name == "australian": - base_model = "Meta-Llama-3.1-8B-Instruct" - base_model_path = "NousResearch/Meta-Llama-3.1-8B-Instruct" - license = "Others" - else: - base_model = "Qwen2.5-7B" - base_model_path = "Qwen/Qwen2.5-7B" - license = "Apache-2.0" - - model_folder = f"models/{base_model}/{dataset_name}" - versions = sorted(os.listdir(model_folder)) - - for i, version in enumerate(versions): - folder_path = f"learnware_pool/{benchmark_name}/learnwares/{dataset_name}-{i+1}" - os.makedirs(folder_path, exist_ok=True) - copy_adapter(folder_path, version, model_folder) - update_from_default(folder_path, os.path.join(default_path, base_model)) - build_specification_from_cache(folder_path, dataset_name) - zip_dir = f"learnware_pool/{benchmark_name}/zips" - os.makedirs(zip_dir, exist_ok=True) - zip_path = os.path.join(zip_dir, f"{dataset_name}-{i+1}.zip") - compress_folder_to_zip(folder_path, zip_path) - - semantic_spec = generate_semantic_spec( - name=f"{dataset_name}-{i+1}", - description=f"LoRA adapter fine-tuned using SFT on the {dataset_name} dataset. Hugging Face path of its base model: {base_model_path}", - data_type="Text", - model_type="PEFT Model", - task_type="Text Generation", - library_type="PyTorch", - scenarios=[benchmark2scenario[benchmark_name]], - license=license, - input_description=None, - output_description=None, - ) - # semantic_spec = generate_semantic_spec( - # name=name, - # description=name, - # data_type="Text", - # model_type="Base Model", - # task_type="Text Generation", - # library_type="PyTorch", - # scenarios=["Others"], - # license="Others", - # input_description=None, - # output_description=None, - # ) - market.add_learnware(zip_path, semantic_spec) - - -def update_from_default(folder_path, default_path): - for item in os.listdir(default_path): - src_item = os.path.join(default_path, item) - dest_item = os.path.join(folder_path, item) - - if not os.path.exists(dest_item): - print(f"Copy default files to {dest_item}") - if os.path.isdir(src_item): - shutil.copytree(src_item, dest_item) - else: - shutil.copy2(src_item, dest_item) - - -def copy_adapter(folder_path, version, model_folder): - if not os.path.exists(os.path.join(folder_path, "adapter")): - print(f"Copy adapter files from {model_folder}/{version} to {folder_path}") - os.makedirs(folder_path, exist_ok=True) - shutil.copytree( - os.path.join(model_folder, version, "adapter"), - os.path.join(folder_path, "adapter")) - - -def compress_folder_to_zip(folder_path, zip_file_path): - """ - 将指定文件夹压缩为 ZIP 文件。 - - :param folder_path: 要压缩的文件夹路径 - :param zip_file_path: 生成的 ZIP 文件路径 - """ - if not os.path.exists(zip_file_path): - print(f"Compress folder to zip_path {zip_file_path}") - with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for root, dirs, files in os.walk(folder_path): - for file in files: - file_path = os.path.join(root, file) - # 将文件添加到 ZIP 中,并保留相对路径 - arcname = os.path.relpath(file_path, folder_path) - zipf.write(file_path, arcname) - - -def build_specification_from_cache(folder_path, dataset_name): - rkme_path = os.path.join(folder_path, "rkme.json") - generative_path = os.path.join(folder_path, "generative.pth") - - if not os.path.exists(rkme_path): - print(f"Build RKME from cache to {rkme_path}") - if dataset_name in LEARNWARE_FIN: - src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/finance/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" - shutil.copy2(src_path, rkme_path) - elif dataset_name in LEARNWARE_MED: - src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/medical/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" - shutil.copy2(src_path, rkme_path) - elif dataset_name in LEARNWARE_MATH: - src_path = f"/home/zhaozc/text_learnware/llama3-finetune/storage/rkmes/math/reduced_set_size_100/gamma_0.1/learnware/{dataset_name}.json" - shutil.copy2(src_path, rkme_path) - - if not os.path.exists(generative_path): - print(f"Build PAVE from cache to {generative_path}") - if dataset_name in LEARNWARE_FIN: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-finance-GridSearch-qwen/condidate-{1}/learnware-{dataset_name}/finetuned.pt", weights_only=False) - elif dataset_name in LEARNWARE_MED: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-med-GridSearch-qwen-backup/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) - elif dataset_name in LEARNWARE_MATH: - finetuned_checkpoint = torch.load(f"/home/shihy/drive/LLM-math-GridSearch-qwen/condidate-{0}/{dataset_name}/finetuned.pt", weights_only=False) - else: - raise NotImplementedError("Invalid dataset_name") - - finetuned_state_dict = finetuned_checkpoint["state_dict"]["model"] - task_vector = torch.concatenate([ - p.reshape(-1) for n, p in finetuned_state_dict.items() - ]) - torch.save({ - "type": "GenerativeModelSpecification", - "task_vector": task_vector.detach().cpu() - }, generative_path) - - - -def build_market(benchmark_name, rebuild=True): - llm_market = instantiate_learnware_market(market_id=f"llm_{benchmark_name}", name="llm", rebuild=rebuild) - benchmark = Benchmark(benchmark_name) - learnware_names = benchmark.get_learnware_names() - print("Leanrware Names:", ", ".join(learnware_names)) - for name in learnware_names: - title = "="*20 + name + "="*20 - print(title) - # train_dataset, _ = benchmark.get_learnware_dataset(name) - add_learnware_to_market(benchmark_name, name, llm_market) - # prepare_learnware(benchmark_name, name) - print("Market size after adding learnware:", len(llm_market)) - print("=" * len(title)) - - -if __name__ == "__main__": - build_market("medical") - build_market("math") - build_market("finance") \ No newline at end of file diff --git a/examples/dataset_llm_workflow/eval_config.py b/examples/dataset_llm_workflow/eval_config.py index 9477db30..e79de6bf 100644 --- a/examples/dataset_llm_workflow/eval_config.py +++ b/examples/dataset_llm_workflow/eval_config.py @@ -180,8 +180,4 @@ ), ] -CONFIG = { - "medical": medical_eval_configs, - "math": math_eval_configs, - "finance": finance_eval_configs -} \ No newline at end of file +CONFIG = {"medical": medical_eval_configs, "math": math_eval_configs, "finance": finance_eval_configs} diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/flare.py b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py index e6fe0248..2a16b6d2 100644 --- a/examples/dataset_llm_workflow/extra_tasks/flare/flare.py +++ b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py @@ -1,6 +1,7 @@ """ FLARE """ + from lm_eval.api.instance import Instance import numpy as np from seqeval.metrics import f1_score as entity_score @@ -10,14 +11,16 @@ from lm_eval.api.task import ConfigurableTask import os + def mean(arr): return sum(arr) / len(arr) + def process_text(entity_string, text): # Initialize entity_list = [(", ".join(val.split(", ")[:-1]), val.split(", ")[-1]) for val in entity_string.split("\n")] text_words = text.split() - labels = ['O'] * len(text_words) + labels = ["O"] * len(text_words) # text_lower = text.lower() text_lower = text @@ -36,7 +39,8 @@ def process_text(entity_string, text): start = 0 while True: start = text_lower.find(entity_lower, start) - if not entity or start == -1: break # No more occurrence + if not entity or start == -1: + break # No more occurrence end = start + len(entity) - 1 # Find the words included in this occurrence @@ -45,9 +49,9 @@ def process_text(entity_string, text): end_word = next(i for i, ind in enumerate(word_indices) if ind > end) # Label the words - labels[start_word] = 'B-' + entity_type - for i in range(start_word+1, end_word): - labels[i] = 'I-' + entity_type + labels[start_word] = "B-" + entity_type + for i in range(start_word + 1, end_word): + labels[i] = "I-" + entity_type # Move to the next character after the occurrence except Exception: @@ -284,7 +288,7 @@ def construct_requests(self, doc, ctx, **kwargs): **kwargs, ) ] - + def process_result(self, pred, gold, tokens): format_pred = ["O"] * len(gold) for index, pre in enumerate(pred.split("\n")[: len(tokens)]): @@ -299,10 +303,7 @@ def process_result(self, pred, gold, tokens): def entity_f1(self, items): golds, preds, tokens = zip(*items) - list_preds = [ - self.process_result(pred, gold, token) - for pred, gold, token in zip(preds, golds, tokens) - ] + list_preds = [self.process_result(pred, gold, token) for pred, gold, token in zip(preds, golds, tokens)] f1 = entity_score(golds, list_preds) return f1 @@ -320,10 +321,7 @@ def process_label_result(self, pred, gold, tokens): def label_f1(self, items): golds, preds, tokens = zip(*items) - list_preds = [ - self.process_label_result(pred, gold, token) - for pred, gold, token in zip(preds, golds, tokens) - ] + list_preds = [self.process_label_result(pred, gold, token) for pred, gold, token in zip(preds, golds, tokens)] list_preds = [item for sublist in list_preds for item in sublist] golds = [self.LMAP[item] for sublist in golds for item in sublist] f1 = f1_score(golds, list_preds, average="weighted") @@ -551,11 +549,7 @@ def get_sum(self, labels, texts): for label, text in zip(labels, texts): text = text.split("\n") new_text = "\n".join( - [ - text[index] - for index in range(len(text)) - if index < len(label) and label[index] == 1 - ] + [text[index] for index in range(len(text)) if index < len(label) and label[index] == 1] ) summ.append(new_text) return summ @@ -943,9 +937,7 @@ def process_results(self, doc, results): for choice in doc["choices"]: if self.LOWER_CASE: choice = choice.lower() - if choice in ini_result or any( - [val in ini_result for val in self.CHOICE_DICT[choice]] - ): + if choice in ini_result or any([val in ini_result for val in self.CHOICE_DICT[choice]]): result = choice break if result is None: @@ -1054,7 +1046,7 @@ class FOMC(Classification): def has_training_docs(self): return False - + def has_validation_docs(self): return False @@ -1133,7 +1125,7 @@ def test_docs(self): return self.dataset["test"] def doc_to_text(self, doc): - # TODO: Format the query prompt portion of the document example. + # TODO: Format the query prompt portion of the document example. return doc["query"] def doc_to_target(self, doc): @@ -1141,7 +1133,7 @@ def doc_to_target(self, doc): def process_results(self, doc, results): pred = results[0].split("\n")[0] - pred = re.findall(r'[0-9]+(?:\.[0-9]+)?', pred) + pred = re.findall(r"[0-9]+(?:\.[0-9]+)?", pred) missing = 0 if not pred: pred = -100.0 @@ -1149,17 +1141,14 @@ def process_results(self, doc, results): else: pred = pred[0] pred = float(pred) - return { - "rmse": (doc["answer"], pred), - "missing": missing - } + return {"rmse": (doc["answer"], pred), "missing": missing} def higher_is_better(self): return { "rmse": False, } - def construct_requests(self, doc, ctx,**kwargs): + def construct_requests(self, doc, ctx, **kwargs): """ Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. @@ -1200,8 +1189,7 @@ def aggregation(self): return { "rmse": self.rmse, "missing": mean, - } - + } class CFA(Classification): @@ -1210,7 +1198,7 @@ class CFA(Classification): def has_training_docs(self): return False - + def has_validation_docs(self): return False @@ -1220,7 +1208,7 @@ class FINARGECCARC(Classification): def has_training_docs(self): return False - + def has_validation_docs(self): return False @@ -1230,7 +1218,7 @@ class FINARGECCAUC(Classification): def has_training_docs(self): return False - + def has_validation_docs(self): return False @@ -1244,64 +1232,205 @@ class MLESG(Classification): def has_training_docs(self): return False - + def has_validation_docs(self): return False class FSRL(SequentialLabeling): DATASET_PATH = "chancefocus/flare-fsrl" - LMAP = {key: index for index, key in enumerate(['O', 'I-QUANT', 'B-QUANT', 'I-TIME', 'B-TIME', 'I-MANNER', 'B-MANNER', 'I-THEME', 'B-THEME', 'I-VALUE', 'B-VALUE', 'I-WHOLE', 'B-WHOLE', 'I-LOCATION', 'B-LOCATION', 'I-AGENT', 'B-AGENT', 'I-CAUSE', 'B-CAUSE', 'I-SOURCE', 'B-SOURCE', 'I-REF_TIME', 'B-REF_TIME', 'I-CONDITION', 'B-CONDITION'])} + LMAP = { + key: index + for index, key in enumerate( + [ + "O", + "I-QUANT", + "B-QUANT", + "I-TIME", + "B-TIME", + "I-MANNER", + "B-MANNER", + "I-THEME", + "B-THEME", + "I-VALUE", + "B-VALUE", + "I-WHOLE", + "B-WHOLE", + "I-LOCATION", + "B-LOCATION", + "I-AGENT", + "B-AGENT", + "I-CAUSE", + "B-CAUSE", + "I-SOURCE", + "B-SOURCE", + "I-REF_TIME", + "B-REF_TIME", + "I-CONDITION", + "B-CONDITION", + ] + ) + } + class CFA(Classification): DATASET_PATH = "chancefocus/flare-cfa" def has_training_docs(self): return False - + def has_validation_docs(self): return False + # class FinargECCAUC(Classification): # DATASET_PATH = "chancefocus/flare-finarg-ecc-auc" # class FinargECCARC(Classification): # DATASET_PATH = "chancefocus/flare-finarg-ecc-arc" + class CD(SequentialLabeling): DATASET_PATH = "chancefocus/flare-cd" - LMAP = {key: index for index, key in enumerate(['O', 'I-CAUSE', 'B-CAUSE', 'I-EFFECT', 'B-EFFECT'])} + LMAP = {key: index for index, key in enumerate(["O", "I-CAUSE", "B-CAUSE", "I-EFFECT", "B-EFFECT"])} + class MultiFinEN(Classification): DATASET_PATH = "chancefocus/flare-multifin-en" def has_training_docs(self): return False - + def has_validation_docs(self): return False + class MA(Classification): DATASET_PATH = "chancefocus/flare-ma" def has_training_docs(self): return False - + def has_validation_docs(self): return False + class Causal20SC(Classification): DATASET_PATH = "chancefocus/flare-causal20-sc" def has_training_docs(self): return False - + def has_validation_docs(self): return False + class FNXL(SequentialLabeling): DATASET_PATH = "chancefocus/flare-fnxl" - LMAP = {'B-BusinessCombinationContingentConsiderationArrangementsRangeOfOutcomesValueHigh': 140, 'B-VariableInterestEntityOwnershipPercentage': 646, 'B-GainLossOnDispositionOfAssets1': 119, 'B-IndefiniteLivedIntangibleAssetsExcludingGoodwill': 46, 'B-MarketingAndAdvertisingExpense': 269, 'B-ReportingUnitPercentageOfFairValueInExcessOfCarryingAmount': 142, 'B-CapitalizedComputerSoftwareNet': 91, 'B-BusinessCombinationConsiderationTransferredEquityInterestsIssuedAndIssuable': 183, 'B-LitigationSettlementExpense': 115, 'B-DefinedBenefitPlanExpectedAmortizationOfGainLossNextFiscalYear': 639, 'B-DeferredCompensationArrangementWithIndividualCompensationExpense': 15, 'B-ReclassificationFromAociCurrentPeriodTax': 152, 'B-OtherComprehensiveIncomeLossBeforeReclassificationsTax': 694, 'B-PreferredStockDividendsPerShareDeclared': 236, 'B-CapitalExpendituresIncurredButNotYetPaid': 344, 'B-DeferredCompensationArrangementWithIndividualContributionsByEmployer': 560, 'B-SeveranceCosts1': 311, 'B-InterestExpense': 784, 'B-SaleOfStockConsiderationReceivedOnTransaction': 76, 'B-LineOfCreditFacilityInterestRateAtPeriodEnd': 822, 'B-SharesIssuedPricePerShare': 137, 'B-EquityMethodInvestmentDifferenceBetweenCarryingAmountAndUnderlyingEquity': 63, 'B-EquitySecuritiesFvNi': 30, 'B-RightOfUseAssetObtainedInExchangeForOperatingLeaseLiability': 118, 'B-DefinedBenefitPlanFundedStatusOfPlan': 547, 'B-SharebasedCompensationArrangementBySharebasedPaymentAwardPurchasePriceOfCommonStockPercent': 323, 'B-TaxCutsAndJobsActOf2017IncomeTaxExpenseBenefit': 256, 'B-LongtermDebtWeightedAverageInterestRate': 364, 'B-ImpairmentOfIntangibleAssetsFinitelived': 71, 'B-ProceedsFromLinesOfCredit': 496, 'B-LongTermPurchaseCommitmentAmount': 701, 'B-DebtInstrumentFairValue': 335, 'B-RestructuringAndRelatedCostCostIncurredToDate1': 52, 'B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsVestedInPeriod': 581, 'B-FiniteLivedIntangibleAssetsAccumulatedAmortization': 143, 'B-StockRepurchasedAndRetiredDuringPeriodValue': 330, 'B-BusinessCombinationProFormaInformationRevenueOfAcquireeSinceAcquisitionDateActual': 77, 'B-ClassOfWarrantOrRightExercisePriceOfWarrantsOrRights1': 361, 'B-BusinessAcquisitionPurchasePriceAllocationGoodwillExpectedTaxDeductibleAmount': 550, 'B-OperatingLossCarryforwardsValuationAllowance': 173, 'B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued': 32, 'B-DefinedContributionPlanMaximumAnnualContributionsPerEmployeePercent': 45, 'B-ContractWithCustomerLiabilityCurrent': 2, 'B-IncomeLossFromContinuingOperationsBeforeIncomeTaxesForeign': 474, 'B-FiniteLivedIntangibleAssetsAmortizationExpenseYearThree': 1306, 'B-DefinedBenefitPlanUltimateHealthCareCostTrendRate1': 62, 'B-DefinedBenefitPlanRecognizedNetGainLossDueToSettlements1': 317, 'B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesExpense': 448, 'B-ForeignCurrencyTransactionGainLossRealized': 132, 'B-DeferredTaxAssetsOperatingLossCarryforwardsSubjectToExpiration': 262, 'B-RetainedEarningsAccumulatedDeficit': 174, 'B-ProceedsFromIssuanceOfCommonStock': 209, 'B-EmployeeServiceShareBasedCompensationAllocationOfRecognizedPeriodCostsCapitalizedAmount': 29, 'B-OtherComprehensiveIncomeLossPensionAndOtherPostretirementBenefitPlansTax': 284, 'B-InventoryWriteDown': 465, 'B-RestructuringReserve': 234, 'B-LitigationSettlementAmountAwardedToOtherParty': 42, 'B-DerivativeGainLossOnDerivativeNet': 87, 'B-SharebasedCompensationArrangementBySharebasedPaymentAwardEquityInstrumentsOtherThanOptionsAggregateIntrinsicValueVested': 241, 'B-DerivativeFixedInterestRate': 589, 'B-CashAndCashEquivalentsAtCarryingValue': 257, 'B-ContractWithCustomerAssetNet': 245, 'B-RestructuringAndRelatedCostExpectedCost1': 107, 'B-IncomeTaxHolidayAggregateDollarAmount': 347, 'B-OperatingLeaseCost': 248, 'B-AllowanceForDoubtfulAccountsReceivable': 146, 'B-RepaymentsOfDebt': 416, 'B-InterestPaid': 110, 'B-DeferredFinanceCostsNet': 28, 'B-IncomeTaxExaminationPenaltiesAndInterestAccrued': 271, 'B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber': 92, 'B-CapitalizedContractCostNet': 155, 'B-CumulativeEffectOfNewAccountingPrincipleInPeriodOfAdoption': 17, 'B-IncomeTaxesPaid': 495, 'B-EquityMethodInvestmentOtherThanTemporaryImpairment': 22, 'B-InterestPaidNet': 225, 'B-EquitySecuritiesWithoutReadilyDeterminableFairValueAmount': 175, 'B-ImpairmentOfLongLivedAssetsHeldForUse': 313, 'B-GoodwillAcquiredDuringPeriod': 156, 'B-DecreaseInUnrecognizedTaxBenefitsIsReasonablyPossible': 363, 'B-RestructuringAndRelatedCostIncurredCost': 75, 'B-StockRepurchasedDuringPeriodValue': 254, 'B-IncomeTaxExaminationPenaltiesAndInterestExpense': 525, 'B-ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill': 55, 'B-PreferredStockLiquidationPreference': 157, 'B-ImpairmentOfIntangibleAssetsExcludingGoodwill': 158, 'B-IncomeTaxesPaidNet': 456, 'B-DefinedContributionPlanEmployerMatchingContributionPercent': 332, 'B-CostOfGoodsAndServicesSold': 274, 'B-DepreciationDepletionAndAmortization': 338, 'B-InterestExpenseDebt': 191, 'B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage': 442, 'B-DisposalGroupIncludingDiscontinuedOperationConsideration': 6, 'B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesAccrued': 14, 'B-SaleOfStockPricePerShare': 278, 'B-DefinedContributionPlanEmployerMatchingContributionPercentOfMatch': 267, 'B-FinitelivedIntangibleAssetsAcquired1': 202, 'B-PaymentsForRepurchaseOfCommonStock': 486, 'B-BusinessCombinationContingentConsiderationLiability': 103, 'B-RelatedPartyTransactionAmountsOfTransaction': 180, 'O': 0} + LMAP = { + "B-BusinessCombinationContingentConsiderationArrangementsRangeOfOutcomesValueHigh": 140, + "B-VariableInterestEntityOwnershipPercentage": 646, + "B-GainLossOnDispositionOfAssets1": 119, + "B-IndefiniteLivedIntangibleAssetsExcludingGoodwill": 46, + "B-MarketingAndAdvertisingExpense": 269, + "B-ReportingUnitPercentageOfFairValueInExcessOfCarryingAmount": 142, + "B-CapitalizedComputerSoftwareNet": 91, + "B-BusinessCombinationConsiderationTransferredEquityInterestsIssuedAndIssuable": 183, + "B-LitigationSettlementExpense": 115, + "B-DefinedBenefitPlanExpectedAmortizationOfGainLossNextFiscalYear": 639, + "B-DeferredCompensationArrangementWithIndividualCompensationExpense": 15, + "B-ReclassificationFromAociCurrentPeriodTax": 152, + "B-OtherComprehensiveIncomeLossBeforeReclassificationsTax": 694, + "B-PreferredStockDividendsPerShareDeclared": 236, + "B-CapitalExpendituresIncurredButNotYetPaid": 344, + "B-DeferredCompensationArrangementWithIndividualContributionsByEmployer": 560, + "B-SeveranceCosts1": 311, + "B-InterestExpense": 784, + "B-SaleOfStockConsiderationReceivedOnTransaction": 76, + "B-LineOfCreditFacilityInterestRateAtPeriodEnd": 822, + "B-SharesIssuedPricePerShare": 137, + "B-EquityMethodInvestmentDifferenceBetweenCarryingAmountAndUnderlyingEquity": 63, + "B-EquitySecuritiesFvNi": 30, + "B-RightOfUseAssetObtainedInExchangeForOperatingLeaseLiability": 118, + "B-DefinedBenefitPlanFundedStatusOfPlan": 547, + "B-SharebasedCompensationArrangementBySharebasedPaymentAwardPurchasePriceOfCommonStockPercent": 323, + "B-TaxCutsAndJobsActOf2017IncomeTaxExpenseBenefit": 256, + "B-LongtermDebtWeightedAverageInterestRate": 364, + "B-ImpairmentOfIntangibleAssetsFinitelived": 71, + "B-ProceedsFromLinesOfCredit": 496, + "B-LongTermPurchaseCommitmentAmount": 701, + "B-DebtInstrumentFairValue": 335, + "B-RestructuringAndRelatedCostCostIncurredToDate1": 52, + "B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsVestedInPeriod": 581, + "B-FiniteLivedIntangibleAssetsAccumulatedAmortization": 143, + "B-StockRepurchasedAndRetiredDuringPeriodValue": 330, + "B-BusinessCombinationProFormaInformationRevenueOfAcquireeSinceAcquisitionDateActual": 77, + "B-ClassOfWarrantOrRightExercisePriceOfWarrantsOrRights1": 361, + "B-BusinessAcquisitionPurchasePriceAllocationGoodwillExpectedTaxDeductibleAmount": 550, + "B-OperatingLossCarryforwardsValuationAllowance": 173, + "B-BusinessAcquisitionEquityInterestsIssuedOrIssuableNumberOfSharesIssued": 32, + "B-DefinedContributionPlanMaximumAnnualContributionsPerEmployeePercent": 45, + "B-ContractWithCustomerLiabilityCurrent": 2, + "B-IncomeLossFromContinuingOperationsBeforeIncomeTaxesForeign": 474, + "B-FiniteLivedIntangibleAssetsAmortizationExpenseYearThree": 1306, + "B-DefinedBenefitPlanUltimateHealthCareCostTrendRate1": 62, + "B-DefinedBenefitPlanRecognizedNetGainLossDueToSettlements1": 317, + "B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesExpense": 448, + "B-ForeignCurrencyTransactionGainLossRealized": 132, + "B-DeferredTaxAssetsOperatingLossCarryforwardsSubjectToExpiration": 262, + "B-RetainedEarningsAccumulatedDeficit": 174, + "B-ProceedsFromIssuanceOfCommonStock": 209, + "B-EmployeeServiceShareBasedCompensationAllocationOfRecognizedPeriodCostsCapitalizedAmount": 29, + "B-OtherComprehensiveIncomeLossPensionAndOtherPostretirementBenefitPlansTax": 284, + "B-InventoryWriteDown": 465, + "B-RestructuringReserve": 234, + "B-LitigationSettlementAmountAwardedToOtherParty": 42, + "B-DerivativeGainLossOnDerivativeNet": 87, + "B-SharebasedCompensationArrangementBySharebasedPaymentAwardEquityInstrumentsOtherThanOptionsAggregateIntrinsicValueVested": 241, + "B-DerivativeFixedInterestRate": 589, + "B-CashAndCashEquivalentsAtCarryingValue": 257, + "B-ContractWithCustomerAssetNet": 245, + "B-RestructuringAndRelatedCostExpectedCost1": 107, + "B-IncomeTaxHolidayAggregateDollarAmount": 347, + "B-OperatingLeaseCost": 248, + "B-AllowanceForDoubtfulAccountsReceivable": 146, + "B-RepaymentsOfDebt": 416, + "B-InterestPaid": 110, + "B-DeferredFinanceCostsNet": 28, + "B-IncomeTaxExaminationPenaltiesAndInterestAccrued": 271, + "B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsNonvestedNumber": 92, + "B-CapitalizedContractCostNet": 155, + "B-CumulativeEffectOfNewAccountingPrincipleInPeriodOfAdoption": 17, + "B-IncomeTaxesPaid": 495, + "B-EquityMethodInvestmentOtherThanTemporaryImpairment": 22, + "B-InterestPaidNet": 225, + "B-EquitySecuritiesWithoutReadilyDeterminableFairValueAmount": 175, + "B-ImpairmentOfLongLivedAssetsHeldForUse": 313, + "B-GoodwillAcquiredDuringPeriod": 156, + "B-DecreaseInUnrecognizedTaxBenefitsIsReasonablyPossible": 363, + "B-RestructuringAndRelatedCostIncurredCost": 75, + "B-StockRepurchasedDuringPeriodValue": 254, + "B-IncomeTaxExaminationPenaltiesAndInterestExpense": 525, + "B-ImpairmentOfIntangibleAssetsIndefinitelivedExcludingGoodwill": 55, + "B-PreferredStockLiquidationPreference": 157, + "B-ImpairmentOfIntangibleAssetsExcludingGoodwill": 158, + "B-IncomeTaxesPaidNet": 456, + "B-DefinedContributionPlanEmployerMatchingContributionPercent": 332, + "B-CostOfGoodsAndServicesSold": 274, + "B-DepreciationDepletionAndAmortization": 338, + "B-InterestExpenseDebt": 191, + "B-LineOfCreditFacilityUnusedCapacityCommitmentFeePercentage": 442, + "B-DisposalGroupIncludingDiscontinuedOperationConsideration": 6, + "B-UnrecognizedTaxBenefitsInterestOnIncomeTaxesAccrued": 14, + "B-SaleOfStockPricePerShare": 278, + "B-DefinedContributionPlanEmployerMatchingContributionPercentOfMatch": 267, + "B-FinitelivedIntangibleAssetsAcquired1": 202, + "B-PaymentsForRepurchaseOfCommonStock": 486, + "B-BusinessCombinationContingentConsiderationLiability": 103, + "B-RelatedPartyTransactionAmountsOfTransaction": 180, + "O": 0, + } + class TATQA(QA): DATASET_PATH = "chancefocus/flare-tatqa" @@ -1365,7 +1494,7 @@ class travelinsurace(Classification): class BARTScorer: - def __init__(self, device='cuda:0', max_length=1024, checkpoint='facebook/bart-large-cnn'): + def __init__(self, device="cuda:0", max_length=1024, checkpoint="facebook/bart-large-cnn"): # Set up model self.device = device self.max_length = max_length @@ -1375,49 +1504,37 @@ def __init__(self, device='cuda:0', max_length=1024, checkpoint='facebook/bart-l self.model.to(device) # Set up loss - self.loss_fct = nn.NLLLoss(reduction='none', ignore_index=self.model.config.pad_token_id) + self.loss_fct = nn.NLLLoss(reduction="none", ignore_index=self.model.config.pad_token_id) self.lsm = nn.LogSoftmax(dim=1) def load(self, path=None): - """ Load model from paraphrase finetuning """ + """Load model from paraphrase finetuning""" if path is None: - path = 'models/bart.pth' + path = "models/bart.pth" self.model.load_state_dict(torch.load(path, map_location=self.device)) def score(self, srcs, tgts, batch_size=4): - """ Score a batch of examples """ + """Score a batch of examples""" score_list = [] for i in range(0, len(srcs), batch_size): - src_list = srcs[i: i + batch_size] - tgt_list = tgts[i: i + batch_size] + src_list = srcs[i : i + batch_size] + tgt_list = tgts[i : i + batch_size] try: with torch.no_grad(): encoded_src = self.tokenizer( - src_list, - max_length=self.max_length, - truncation=True, - padding=True, - return_tensors='pt' + src_list, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" ) encoded_tgt = self.tokenizer( - tgt_list, - max_length=self.max_length, - truncation=True, - padding=True, - return_tensors='pt' + tgt_list, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" ) - src_tokens = encoded_src['input_ids'].to(self.device) - src_mask = encoded_src['attention_mask'].to(self.device) + src_tokens = encoded_src["input_ids"].to(self.device) + src_mask = encoded_src["attention_mask"].to(self.device) - tgt_tokens = encoded_tgt['input_ids'].to(self.device) - tgt_mask = encoded_tgt['attention_mask'] + tgt_tokens = encoded_tgt["input_ids"].to(self.device) + tgt_mask = encoded_tgt["attention_mask"] tgt_len = tgt_mask.sum(dim=1).to(self.device) - output = self.model( - input_ids=src_tokens, - attention_mask=src_mask, - labels=tgt_tokens - ) + output = self.model(input_ids=src_tokens, attention_mask=src_mask, labels=tgt_tokens) logits = output.logits.view(-1, self.model.config.vocab_size) loss = self.loss_fct(self.lsm(logits), tgt_tokens.view(-1)) loss = loss.view(tgt_tokens.shape[0], -1) @@ -1427,8 +1544,8 @@ def score(self, srcs, tgts, batch_size=4): except RuntimeError: traceback.print_exc() - print(f'source: {src_list}') - print(f'target: {tgt_list}') + print(f"source: {src_list}") + print(f"target: {tgt_list}") exit(0) return score_list @@ -1453,18 +1570,13 @@ def multi_ref_score(self, srcs, tgts: List[List[str]], agg="mean", batch_size=4) return list(score_list) def test(self, batch_size=3): - """ Test """ + """Test""" src_list = [ - 'This is a very good idea. Although simple, but very insightful.', - 'Can I take a look?', - 'Do not trust him, he is a liar.' + "This is a very good idea. Although simple, but very insightful.", + "Can I take a look?", + "Do not trust him, he is a liar.", ] - tgt_list = [ - "That's stupid.", - "What's the problem?", - 'He is trustworthy.' - ] + tgt_list = ["That's stupid.", "What's the problem?", "He is trustworthy."] print(self.score(src_list, tgt_list, batch_size)) - diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index 33ba5e39..d8b25270 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -11,7 +11,7 @@ import numpy as np import matplotlib.pyplot as plt import lm_eval -from lm_eval.models.huggingface import HFLM +from lm_eval.models.huggingface import HFLM from learnware.client import LearnwareClient from learnware.logger import get_module_logger @@ -32,21 +32,35 @@ def _plot_radar_chart(self, benchmark_name, results_table): column_split = [ ["Learnware", "Qwen2.5-7B", "Llama3.1-8B-Instruct", "Llama3.1-8B"], ["Learnware", "Qwen1.5-110B", "Qwen2.5-72B", "Llama3.1-70B-Instruct"], - ["Learnware", "Random", "Best-single", "Oracle"] + ["Learnware", "Random", "Best-single", "Oracle"], ] YTICKS = [0.2, 0.4, 0.6, 0.8, 1.0] ylim = (0, 1.15) x_label_fontsize = 4.5 labels = [ - "Australian", "LendingClub", "FiQA-SA", "FPB", "German", "Headlines", - "NER", "ACL18", "BigData22", "CIKM18", "SC", "FinArg-ARC", "FinArg-ACC", - "FOMC", "MA", "MLESG", "MultiFin" + "Australian", + "LendingClub", + "FiQA-SA", + "FPB", + "German", + "Headlines", + "NER", + "ACL18", + "BigData22", + "CIKM18", + "SC", + "FinArg-ARC", + "FinArg-ACC", + "FOMC", + "MA", + "MLESG", + "MultiFin", ] elif benchmark_name == "math": column_split = [ ["Learnware", "Qwen2.5-7B"], ["Learnware", "Qwen1.5-110B"], - ["Learnware", "Random", "Best-single", "Oracle"] + ["Learnware", "Random", "Best-single", "Oracle"], ] YTICKS = [0.4, 0.6, 0.8, 1.0] ylim = (0.3, 1.3) @@ -55,54 +69,50 @@ def _plot_radar_chart(self, benchmark_name, results_table): column_split = [ ["Learnware", "Qwen2.5-7B"], ["Learnware", "Flan-PaLM-540B"], - ["Learnware", "Random", "Best-single", "Oracle"] + ["Learnware", "Random", "Best-single", "Oracle"], ] YTICKS = [0.8, 0.9, 1.0] ylim = (0.75, 1.1) x_label_fontsize = 8 - - num_vars = len(labels) + + num_vars = len(labels) angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist() angles += angles[:1] fig, axes = plt.subplots(1, 3, figsize=(16, 5), subplot_kw=dict(polar=True)) - model_names = [ - "Learnware vs Base Model", - "Learnware vs Large-scale Model", - "Specialized SLMs" - ] + model_names = ["Learnware vs Base Model", "Learnware vs Large-scale Model", "Specialized SLMs"] colors = [ - np.array([0.9, 0.17, 0.31]), - np.array([1.0, 0.49, 0.0]), + np.array([0.9, 0.17, 0.31]), + np.array([1.0, 0.49, 0.0]), np.array([0.19, 0.55, 0.91]), np.array([0.56, 0.74, 0.56]), - np.array([0.66, 0.66, 0.66]) + np.array([0.66, 0.66, 0.66]), ] for i, (ax, model_name) in enumerate(zip(axes, model_names)): - ax.set_xticks(angles[:-1]) - ax.set_yticks(YTICKS) + ax.set_xticks(angles[:-1]) + ax.set_yticks(YTICKS) ax.set_xticklabels(labels, fontsize=x_label_fontsize, rotation=30) - ax.set_yticklabels([str(y) for y in YTICKS]) + ax.set_yticklabels([str(y) for y in YTICKS]) ax.set_ylim(ylim[0], ylim[1]) - ax.set_title(model_name, pad=30) + ax.set_title(model_name, pad=30) methods = column_split[i] - for i, (method, color) in enumerate(zip(methods, colors[:len(methods)])): + for i, (method, color) in enumerate(zip(methods, colors[: len(methods)])): if i == 0: zorder = 2 else: zorder = 1 - + values = (results_table[method] / results_table["Oracle"]).tolist() values += values[:1] - ax.plot(angles, values, color=color, linewidth=2, label=method, zorder=zorder) - ax.fill(angles, values, color=color, alpha=0.1, zorder=zorder) + ax.plot(angles, values, color=color, linewidth=2, label=method, zorder=zorder) + ax.fill(angles, values, color=color, alpha=0.1, zorder=zorder) ax.legend(loc="lower left", fontsize=8, bbox_to_anchor=(0.85, 0.9)) @@ -111,14 +121,19 @@ def _plot_radar_chart(self, benchmark_name, results_table): plt.savefig(f"results/figs/llm-{benchmark_name}.pdf") def _anlysis_table(self, benchmark_name, table, score_results): - if benchmark_name == 'finance': + if benchmark_name == "finance": start_column_id = 7 - else: # math / medical + else: # math / medical start_column_id = 3 table = table[:-1] - performance = table.melt(id_vars=['Dataset'], value_vars=table.columns[start_column_id:], var_name="Source_Config") + performance = table.melt( + id_vars=["Dataset"], value_vars=table.columns[start_column_id:], var_name="Source_Config" + ) performance_extra = table.iloc[:, :start_column_id] - performance = pd.concat([performance, performance["Source_Config"].str.extract(r"(.+)-(\d+)").rename(columns={0:"Learnware"})], axis=1) + performance = pd.concat( + [performance, performance["Source_Config"].str.extract(r"(.+)-(\d+)").rename(columns={0: "Learnware"})], + axis=1, + ) performance["Learnware"] = performance["Learnware"].apply(lambda s: s[:-1] if s[-1] == "-" else s) performance = performance.rename(columns={"Dataset": "User"}) performance.drop(columns=[1], inplace=True) @@ -128,22 +143,23 @@ def _anlysis_table(self, benchmark_name, table, score_results): performance_extra = performance_extra.set_index("User") score_results = pd.DataFrame(score_results) - score_results["Rank-PAVE"] = score_results.groupby("User")["Similarity"].rank(method="min", ascending=False).astype(int) - 1 + score_results["Rank-PAVE"] = ( + score_results.groupby("User")["Similarity"].rank(method="min", ascending=False).astype(int) - 1 + ) adaptation_info = pd.merge(score_results, perf_merged, on=["Learnware", "User"]) - random_value = (adaptation_info[["User", "value"]] - .groupby(['User']).mean()).rename(columns={"value": "Random"}) - oracle_value = (adaptation_info[["User", "value"]] - .groupby(['User']).max()).rename(columns={"value": "Oracle"}) - pave_value = (adaptation_info[adaptation_info["Rank-PAVE"] < 1][["User", "value"]] - .groupby(['User']).mean()).rename(columns={"value": "Learnware"}) - + random_value = (adaptation_info[["User", "value"]].groupby(["User"]).mean()).rename(columns={"value": "Random"}) + oracle_value = (adaptation_info[["User", "value"]].groupby(["User"]).max()).rename(columns={"value": "Oracle"}) + pave_value = ( + adaptation_info[adaptation_info["Rank-PAVE"] < 1][["User", "value"]].groupby(["User"]).mean() + ).rename(columns={"value": "Learnware"}) + # Best-single perf_pivot = perf_merged.pivot(index="User", columns="Learnware", values="value") best_column = perf_pivot.mean().idxmax() - best_single = perf_pivot[[best_column]].rename(columns={best_column: 'Best-single'}) + best_single = perf_pivot[[best_column]].rename(columns={best_column: "Best-single"}) adaptation_table = pd.concat([random_value, pave_value, best_single, oracle_value], axis=1) - + # join performance_extra adaptation_table = performance_extra.join(adaptation_table) @@ -164,7 +180,7 @@ def _anlysis_table(self, benchmark_name, table, score_results): win_tie_loss[col] = f"{win}/{tie}/{loss}" # Oracle win/tie/loss - oracle_scores = adaptation_table["Oracle"] + oracle_scores = adaptation_table["Oracle"] win_tie_loss_o = {} for col in adaptation_table.columns: @@ -175,7 +191,7 @@ def _anlysis_table(self, benchmark_name, table, score_results): loss = (oracle_scores < adaptation_table[col]).sum() win_tie_loss_o[col] = f"{win}/{tie}/{loss}" - adaptation_table.loc['Avg.'] = adaptation_table.mean() + adaptation_table.loc["Avg."] = adaptation_table.mean() adaptation_table.loc["Avg. rank"] = avg_rank adaptation_table = adaptation_table.round(2) adaptation_table.loc["Learnware (win/tie/loss)"] = win_tie_loss @@ -190,11 +206,13 @@ def _anlysis_table(self, benchmark_name, table, score_results): def _prepare_market(self, benchmark: Benchmark, rebuild=False): client = LearnwareClient() self.llm_benchmark = benchmark - self.llm_market = instantiate_learnware_market(market_id=f"llm_{self.llm_benchmark.name}", name="llm", rebuild=rebuild) + self.llm_market = instantiate_learnware_market( + market_id=f"llm_{self.llm_benchmark.name}", name="llm", rebuild=rebuild + ) self.user_semantic = client.get_semantic_specification(self.llm_benchmark.learnware_ids[0]) self.user_semantic["Name"]["Values"] = "" self.user_semantic["Description"]["Values"] = "" - self.user_semantic["License"]["Values"] = ['Apache-2.0', 'Others'] + self.user_semantic["License"]["Values"] = ["Apache-2.0", "Others"] if len(self.llm_market) == 0 or rebuild is True: for learnware_id in self.llm_benchmark.learnware_ids: @@ -215,19 +233,19 @@ def _prepare_market(self, benchmark: Benchmark, rebuild=False): def build_specification_and_cache(self, name, saved_folder, benchmark: Benchmark): generative_spec = GenerativeModelSpecification() generative_spec_path = os.path.join(saved_folder, name, "generative.pth") - + os.makedirs(os.path.join(saved_folder, name), exist_ok=True) - + if os.path.exists(generative_spec_path): generative_spec.load(generative_spec_path) else: train_dataset = benchmark.get_user_dataset(name) generative_spec.generate_stat_spec_from_data(dataset=train_dataset) generative_spec.save(generative_spec_path) - + return generative_spec - def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size='auto'): + def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size="auto"): benchmark_configs = CONFIG[benchmark_name] task_manager = lm_eval.tasks.TaskManager() task_names = [config.name for config in benchmark_configs] @@ -244,23 +262,29 @@ def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size= adapter_id = adapter_path.split("/")[-2] if adapter_path else None task_names_str = ",".join(task_names) if adapter_path: - os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ + os.system( + f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ --model_args pretrained={base_model},peft={adapter_path} \ --tasks {task_names_str} \ --batch_size {batch_size} \ - --output_path ./eval_results/{benchmark_name}") + --output_path ./eval_results/{benchmark_name}" + ) elif base_model in ["Qwen/Qwen1.5-110B", "Qwen/Qwen2.5-72B", "NousResearch/Meta-Llama-3.1-70B-Instruct"]: - os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --num_processes 1 -m lm_eval --model hf \ + os.system( + f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --num_processes 1 -m lm_eval --model hf \ --model_args pretrained={base_model},parallelize=True \ --tasks {task_names_str} \ --batch_size {batch_size} \ - --output_path ./eval_results/{benchmark_name}") + --output_path ./eval_results/{benchmark_name}" + ) else: - os.system(f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ + os.system( + f"CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch -m lm_eval --model hf \ --model_args pretrained={base_model} \ --tasks {task_names_str} \ --batch_size {batch_size} \ - --output_path ./eval_results/{benchmark_name}") + --output_path ./eval_results/{benchmark_name}" + ) if adapter_id: for dir_name in os.listdir(results_dir): @@ -280,38 +304,34 @@ def _get_scores(self, benchmark_name, base_model: str, adapter_path, batch_size= score_list = [] for config in benchmark_configs: - score = results['results'][config.name][f'{config.eval_metric},none'] * 100 + score = results["results"][config.name][f"{config.eval_metric},none"] * 100 score = round(score, 2) logger.info(f"Name: {config.name}, Score: {score}") score_list.append(score) - + return score_list def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): benchmark = Benchmark(benchmark_name) self._prepare_market(benchmark, rebuild) user_names = benchmark.get_user_names() - - score_results = { - "User": [], - "Learnware": [], - "Similarity": [] - } + + score_results = {"User": [], "Learnware": [], "Similarity": []} for name in user_names: title = "=" * 20 + name + "=" * 20 print(title) - + generative_spec = self.build_specification_and_cache(name, "user_specs", benchmark) user_info = BaseUserInfo( semantic_spec=self.user_semantic, stat_info={"GenerativeModelSpecification": generative_spec} ) logger.info(f"Searching Market for user: {name}") - + search_result = self.llm_market.search_learnware(user_info) single_result = search_result.get_single_results() - + scores = {} for result in single_result: learnware_name = result.learnware.specification.semantic_spec["Name"]["Values"] @@ -329,26 +349,40 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): if benchmark_name == "medical": performance_table = { "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), - "Flan-PaLM-540B": [57.60, 67.60, 63.70, 80.40, 88.90, 76.30, 75.00, 83.80, 79.00] # copied from Open Medical LLM Leaderboard + "Flan-PaLM-540B": [ + 57.60, + 67.60, + 63.70, + 80.40, + 88.90, + 76.30, + 75.00, + 83.80, + 79.00, + ], # copied from Open Medical LLM Leaderboard } elif benchmark_name == "math": performance_table = { "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), - "Qwen1.5-110B": self._get_scores(benchmark_name, "Qwen/Qwen1.5-110B", None) + "Qwen1.5-110B": self._get_scores(benchmark_name, "Qwen/Qwen1.5-110B", None), } elif benchmark_name == "finance": performance_table = { "Qwen2.5-7B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-7B", None), - "Llama3.1-8B-Instruct": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-8B-Instruct", None), + "Llama3.1-8B-Instruct": self._get_scores( + benchmark_name, "NousResearch/Meta-Llama-3.1-8B-Instruct", None + ), "Llama3.1-8B": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-8B", None), "Qwen1.5-110B": self._get_scores(benchmark_name, "Qwen/Qwen1.5-110B", None), "Qwen2.5-72B": self._get_scores(benchmark_name, "Qwen/Qwen2.5-72B", None), - "Llama3.1-70B-Instruct": self._get_scores(benchmark_name, "NousResearch/Meta-Llama-3.1-70B-Instruct", None), + "Llama3.1-70B-Instruct": self._get_scores( + benchmark_name, "NousResearch/Meta-Llama-3.1-70B-Instruct", None + ), } for learnware_id in all_learnwares_ids: learnware = self.llm_market.get_learnware_by_ids(learnware_id) - base_model = learnware.specification.semantic_spec["Description"]["Values"].split(' ')[-1] + base_model = learnware.specification.semantic_spec["Description"]["Values"].split(" ")[-1] adapter_path = os.path.join(self.llm_market.get_learnware_dir_path_by_ids(learnware_id), "adapter") score_list = self._get_scores(benchmark_name, base_model, adapter_path) performance_table[learnware.specification.semantic_spec["Name"]["Values"]] = score_list @@ -356,7 +390,7 @@ def llm_example(self, benchmark_name, rebuild=False, skip_eval=True): performance_table = pd.DataFrame(performance_table) performance_table = performance_table._append(performance_table.mean().round(2), ignore_index=True) datasets = benchmark.get_user_names() - performance_table.insert(0, "Dataset", datasets+['Avg']) + performance_table.insert(0, "Dataset", datasets + ["Avg"]) performance_table.to_csv(f"model_performance/{benchmark_name}-new.csv", index=False) else: performance_table = pd.read_csv(f"model_performance/{benchmark_name}.csv") diff --git a/examples/dataset_table_workflow/base.py b/examples/dataset_table_workflow/base.py index 3d26fd60..80848dfc 100644 --- a/examples/dataset_table_workflow/base.py +++ b/examples/dataset_table_workflow/base.py @@ -68,13 +68,15 @@ def _prepare_market(self, benchmark_config, name, rebuild, retrain): market_id=self.benchmark.name, name=name, rebuild=rebuild, - organizer_kwargs={ - "auto_update": True, - "auto_update_limit": len(self.benchmark.learnware_ids), - **market_mapping_params, - } - if retrain - else None, + organizer_kwargs=( + { + "auto_update": True, + "auto_update_limit": len(self.benchmark.learnware_ids), + **market_mapping_params, + } + if retrain + else None + ), ) self.user_semantic = client.get_semantic_specification(self.benchmark.learnware_ids[0]) self.user_semantic["Name"]["Values"] = "" diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index 498e28d7..4249917e 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -301,7 +301,7 @@ def search_learnware(self, user_info: BaseUserInfo, page_size=10, page_index=0): "page": page_index, }, headers=self.headers, - timeout=self.timeout + timeout=self.timeout, ) result = response.json() if result["code"] != 0: @@ -355,12 +355,12 @@ def get_pretrained_path(self, learnware_id: str): pass self.unzip_learnware(learnware_id) pass - + yaml_file = os.path.join(self.default_unzip_path, learnware_id, C.learnware_folder_config["yaml_file"]) with open(yaml_file, "r") as fin: learnware_info = yaml.safe_load(fin) pass - pretrained_path = learnware_info['model'].get("weights_file_path") + pretrained_path = learnware_info["model"].get("weights_file_path") if pretrained_path is None: raise FileNotFoundError(f"Pretrained path not found in learnware {learnware_id}") diff --git a/learnware/config.py b/learnware/config.py index 7df5c38b..4e3e9601 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -172,7 +172,7 @@ def get_platform(): "learnware_folder_config": { "yaml_file": "learnware.yaml", "module_file": "__init__.py", - "weights_file_path" : "weights", + "weights_file_path": "weights", }, "database_url": f"sqlite:///{DATABASE_PATH}", "max_reduced_set_size": 1310720, diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index 31bd8f81..78942176 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -60,21 +60,22 @@ def get_learnware_from_dirpath( if "module_path" not in learnware_config["model"]: learnware_config["model"]["module_path"] = C.learnware_folder_config["module_file"] - if ( - semantic_spec["Data"]["Values"] == ["Text"] - and semantic_spec["Task"]["Values"] == ["Text Generation"] - ): + if semantic_spec["Data"]["Values"] == ["Text"] and semantic_spec["Task"]["Values"] == ["Text Generation"]: if "weights_file_path" not in learnware_config["model"]: learnware_config["model"]["weights_file_path"] = C.learnware_folder_config["weights_file_path"] - + learnware_weights_path = os.path.join(learnware_dirpath, learnware_config["model"]["weights_file_path"]) assert os.path.exists( learnware_weights_path ), f"Weights are not found for the Text Generation Model learnware_{id}, please check the learnware.yaml or zipfile." - + if semantic_spec["Model"]["Values"] == ["PEFT Model"]: - assert "required_learnware_ids" in learnware_config["model"], f"'required_learnware_ids' is not found for the PEFT Model learnware_{id}, please check the learnware.yaml." - assert len(learnware_config["model"]["required_learnware_ids"]) != 0, f"'required_learnware_ids' can't be empty for the PEFT Model learnware_{id}, please check the learnware.yaml." + assert ( + "required_learnware_ids" in learnware_config["model"] + ), f"'required_learnware_ids' is not found for the PEFT Model learnware_{id}, please check the learnware.yaml." + assert ( + len(learnware_config["model"]["required_learnware_ids"]) != 0 + ), f"'required_learnware_ids' can't be empty for the PEFT Model learnware_{id}, please check the learnware.yaml." learnware_spec = Specification() for _stat_spec in learnware_config["stat_specifications"]: diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 8e630140..453e9a7d 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -175,7 +175,9 @@ def __call__(self, learnware): stat_spec = learnware.get_specification().get_stat_spec_by_name(spec_type) distance = float(stat_spec.dist(stat_spec)) if not np.isfinite(distance): - message = f"The distance between statistical specifications is not finite, where distance={distance}" + message = ( + f"The distance between statistical specifications is not finite, where distance={distance}" + ) logger.warning(message) return self.INVALID_LEARNWARE, message @@ -191,7 +193,11 @@ def __call__(self, learnware): return self.INVALID_LEARNWARE, message inputs = np.random.randn(10, *input_shape) - elif spec_type in ["RKMETextSpecification", "GenerativeModelSpecification", "LLMGeneralCapabilitySpecification"]: + elif spec_type in [ + "RKMETextSpecification", + "GenerativeModelSpecification", + "LLMGeneralCapabilitySpecification", + ]: if semantic_spec["Model"]["Values"][0] != "Others": len_ = random.randint(10, 1000) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index 0b8f3e66..a833689c 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -124,7 +124,7 @@ def _match_semantic_spec_tag(self, semantic_spec1, semantic_spec2) -> bool: v1 = semantic_spec1[key].get("Values", "") if len(v1) == 0: continue - + if key not in semantic_spec2: if "Others" in v1: # v1 contains "Others" and key not in semantic_spec2 @@ -132,7 +132,7 @@ def _match_semantic_spec_tag(self, semantic_spec1, semantic_spec2) -> bool: else: # user input contains some key that is not in database return False - + v2 = semantic_spec2[key].get("Values", "") if key not in ("Name", "Description"): if len(v2) == 0: diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index ddcbe4f0..ea07363b 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -18,9 +18,7 @@ class LLMEasyOrganizer(HeteroMapTableOrganizer): - def _update_learnware_general_capability_spec( - self, ids: Union[str, List[str]] - ): + def _update_learnware_general_capability_spec(self, ids: Union[str, List[str]]): """Update learnware by ids, attempting to generate LLMGeneralCapabilitySpecification for them. Parameters diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 24cb229c..015559d9 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -62,10 +62,8 @@ def __call__( sorted_score_list = self._convert_similarity_to_score(sorted_metric_list) else: sorted_score_list = self._convert_dist_to_score(sorted_metric_list) - - logger.info( - f"After search by user spec, learnware_list length is {len(learnware_list)}" - ) + + logger.info(f"After search by user spec, learnware_list length is {len(learnware_list)}") if len(single_learnware_list) == 1 and sorted_score_list[0] < 0.6: sorted_score_list[0] = 0.6 @@ -84,7 +82,7 @@ def _search_by_taskvector_spec_single( self, learnware_list: List[Learnware], user_spec: Union[Specification], - stat_spec_type: str = "GenerativeModelSpecification" + stat_spec_type: str = "GenerativeModelSpecification", ) -> Tuple[List[float], List[Learnware]]: """Calculate the distances between learnwares in the given learnware_list and user_spec @@ -122,12 +120,10 @@ def _search_by_taskvector_spec_single( sorted_learnware_list = [learnware_list[filtered_idx_list[idx]] for idx in sorted_idx_list] return sorted_dist_list, sorted_learnware_list - + def _convert_similarity_to_score(self, sorted_similarity_list, temperature=0.1): sorted_similarity = torch.asarray(sorted_similarity_list) - sorted_similarity = torch.stack([ - sorted_similarity, torch.zeros_like(sorted_similarity) - ]) - + sorted_similarity = torch.stack([sorted_similarity, torch.zeros_like(sorted_similarity)]) + scores = softmax(sorted_similarity / temperature, dim=0)[0].tolist() - return scores * 100 \ No newline at end of file + return scores * 100 diff --git a/learnware/model/base.py b/learnware/model/base.py index 98d990a1..6e0fa19b 100644 --- a/learnware/model/base.py +++ b/learnware/model/base.py @@ -48,7 +48,7 @@ def finetune(self, X: np.ndarray, y: np.ndarray): def get_model(self): """Get the nn.Module object - + Returns: nn.Module: The model object, such as a PreTrainedModel from the transformers library. """ diff --git a/learnware/specification/module.py b/learnware/specification/module.py index c1b28cd8..9aff5f5a 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -178,20 +178,18 @@ def generate_rkme_text_spec( def generate_generative_model_spec( - dataset: Optional[Dataset] = None, - dataset_text_field="text", - X: List[str] = None, - verbose: bool = True, - **kwargs + dataset: Optional[Dataset] = None, dataset_text_field="text", X: List[str] = None, verbose: bool = True, **kwargs ) -> GenerativeModelSpecification: # Check input type if X is not None and (not isinstance(X, list) or not all(isinstance(item, str) for item in X)): raise TypeError("Input data must be a list of strings.") - + # Generate generative model spec task_vector_spec = GenerativeModelSpecification() - task_vector_spec.generate_stat_spec_from_data(dataset=dataset, dataset_text_field=dataset_text_field, X=X, verbose=verbose, **kwargs) - + task_vector_spec.generate_stat_spec_from_data( + dataset=dataset, dataset_text_field=dataset_text_field, X=X, verbose=verbose, **kwargs + ) + return task_vector_spec @@ -241,7 +239,10 @@ def generate_semantic_spec( semantic_specification = dict() semantic_specification["Data"] = {"Type": "Class", "Values": [data_type] if data_type is not None else []} semantic_specification["Task"] = {"Type": "Class", "Values": [task_type] if task_type is not None else []} - semantic_specification["Model"] = {"Type": "Optional", "Values": [model_type] if model_type is not None else ["Others"]} + semantic_specification["Model"] = { + "Type": "Optional", + "Values": [model_type] if model_type is not None else ["Others"], + } semantic_specification["Library"] = { "Type": "Class", "Values": [library_type] if library_type is not None else [], diff --git a/learnware/specification/regular/__init__.py b/learnware/specification/regular/__init__.py index 5e21f434..eb6268c0 100644 --- a/learnware/specification/regular/__init__.py +++ b/learnware/specification/regular/__init__.py @@ -10,5 +10,5 @@ "RKMETableSpecification", "rkme_solve_qp", "RKMETextSpecification", - "GenerativeModelSpecification" + "GenerativeModelSpecification", ] diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index f43b3c95..f03a6de1 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -18,20 +18,19 @@ def generate_stat_spec_from_data(self, **kwargs): class TaskVectorSpecification(RegularStatSpecification): - + @property def task_vector(self): raise NotImplemented def similarity(self, other: TaskVectorSpecification) -> float: - """Compute cosine similarity between two task vectors. - """ + """Compute cosine similarity between two task vectors.""" v1, v2 = self.task_vector, other.task_vector - + return cosine_similarity(v1, v2, dim=0) def dist(self, other: BaseStatSpecification): v1, v2 = self.task_vector, other.task_vector - - similarity = cosine_similarity(v1, v2, dim=0) # [-1, 1] - return (-similarity + 1) / 2 \ No newline at end of file + + similarity = cosine_similarity(v1, v2, dim=0) # [-1, 1] + return (-similarity + 1) / 2 diff --git a/learnware/specification/regular/text/__init__.py b/learnware/specification/regular/text/__init__.py index 2dd221fc..b9bfba6d 100644 --- a/learnware/specification/regular/text/__init__.py +++ b/learnware/specification/regular/text/__init__.py @@ -6,7 +6,9 @@ if not is_torch_available(verbose=False): RKMETextSpecification = None GenerativeModelSpecification = None - logger.error("RKMETextSpecification and GenerativeModelSpecification are not available because 'torch' is not installed!") + logger.error( + "RKMETextSpecification and GenerativeModelSpecification are not available because 'torch' is not installed!" + ) else: from .rkme import RKMETextSpecification from .generative import GenerativeModelSpecification diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index 6b68cbb8..ac1b96eb 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -15,12 +15,7 @@ from peft import LoraConfig, PeftModel from datasets import Dataset -from transformers import ( - PreTrainedModel, - TrainingArguments, - Qwen2ForCausalLM, - Qwen2Tokenizer - ) +from transformers import PreTrainedModel, TrainingArguments, Qwen2ForCausalLM, Qwen2Tokenizer from peft import get_peft_model @@ -34,20 +29,22 @@ class GenerativeModelSpecification(TaskVectorSpecification): """Task Vector Specification for Large Language Model""" - def __init__(self, - cuda_idx: int = None, - attn_implementation: str = "eager", - per_device_train_batch_size: int = 2, - gradient_accumulation_steps: int = 1, - max_seq_length: int = 2048, - **kwargs): + def __init__( + self, + cuda_idx: int = None, + attn_implementation: str = "eager", + per_device_train_batch_size: int = 2, + gradient_accumulation_steps: int = 1, + max_seq_length: int = 2048, + **kwargs, + ): """Initializing Task Vector Specification's parameters. - + Parameters ---------- cuda_idx : int, optional A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. None indicates automatically choose device - + attn_implementation : str, optional The type of attention implementation to use. Default is 'eager'. @@ -65,17 +62,17 @@ def __init__(self, Additional keyword arguments. """ super(GenerativeModelSpecification, self).__init__(type=self.__class__.__name__) - + self._cuda_idx = allocate_cuda_idx() if cuda_idx is None else cuda_idx self._device = choose_device(cuda_idx=self._cuda_idx) - + self._task_vector = None - + self.attn_implementation = attn_implementation self.per_device_train_batch_size = per_device_train_batch_size self.gradient_accumulation_steps = gradient_accumulation_steps self.max_seq_length = max_seq_length - + self.__extra_args = { "weight_decay_l1": 1.0, "weight_decay_l2": 0.5, @@ -84,55 +81,51 @@ def __init__(self, "max_grad_norm": 1.0, "warmup_ratio": 0.03, } - - + @property def task_vector(self): if self._task_vector is None: raise Exception("Call generate_stat_spec_from_data first!") - + return self._task_vector - + @task_vector.setter def task_vector(self, value): self._task_vector = value - + def generate_stat_spec_from_data( self, dataset: Optional[Dataset] = None, dataset_text_field="text", X: List[str] = None, verbose: bool = True, - beimingwu = True, - **kwargs + beimingwu=True, + **kwargs, ): """Initializing Task Vector Specification's parameters. - + Parameters ---------- - + dataset_text_field : str, optional Name of the text field of the dataset. Default is "text". - + """ if dataset is None: assert X is not None, "X and dataset cannot both be None." dataset = Dataset.from_dict({dataset_text_field: X}) - + with tempfile.TemporaryDirectory() as temp_dir: tokenizer, model = self._init_tokenizer_model(beimingwu) trainer_config = self._trainer_config(temp_dir, dataset_text_field) trainer = self._init_trainer(model, tokenizer, dataset, trainer_config) - + param_0 = [p.detach().clone() for n, p in trainer.model.named_parameters() if p.requires_grad] trainer.train() param_1 = [p.detach().clone() for n, p in trainer.model.named_parameters() if p.requires_grad] - self._task_vector = torch.concatenate([ - (p1 - p0).reshape(-1) for p0, p1 in zip(param_0, param_1) - ]) - - + self._task_vector = torch.concatenate([(p1 - p0).reshape(-1) for p0, p1 in zip(param_0, param_1)]) + def _init_tokenizer_model(self, beimingwu): """ Initialize foundational model (e.g. Qwen) used for task vector generation. @@ -145,20 +138,20 @@ def _init_tokenizer_model(self, beimingwu): base_model_path = client.get_pretrained_path("00002890") else: base_model_path = "Qwen/Qwen2.5-0.5B" - - set_seed(3407) + + set_seed(3407) tokenizer = Qwen2Tokenizer.from_pretrained(base_model_path) model = Qwen2ForCausalLM.from_pretrained( base_model_path, attn_implementation=self.attn_implementation, torch_dtype=torch.bfloat16, ).to(self._device) - + if beimingwu: client = LearnwareClient() adapter_path = client.get_pretrained_path("00002891") model = PeftModel.from_pretrained(model, adapter_path) - + for n, p in model.named_parameters(): if "lora_B" in n: p.requires_grad = True @@ -169,19 +162,18 @@ def _init_tokenizer_model(self, beimingwu): lora_dropout=0.1, bias="none", task_type="CAUSAL_LM", - target_modules=["q_proj", "k_proj", "v_proj"] - ) + target_modules=["q_proj", "k_proj", "v_proj"], + ) model = get_peft_model(model, peft_config) - + for n, p in model.named_parameters(): if "lora_A" in n: p.requires_grad = False - + return tokenizer, model - def _init_trainer(self, model, tokenizer, train_dataset, args): - + # TODO: set_seed(3407) trainer = CustomSFTTrainer( model=model, @@ -192,9 +184,8 @@ def _init_trainer(self, model, tokenizer, train_dataset, args): ) # Work around trl package bug with multi-GPU parallelism trainer.args._n_gpu = 1 - + return trainer - def _trainer_config(self, temp_dir, dataset_text_field): training_params = SFTConfig( @@ -217,35 +208,28 @@ def _trainer_config(self, temp_dir, dataset_text_field): dataset_text_field=dataset_text_field, max_seq_length=self.max_seq_length, dataloader_num_workers=16, - seed = 3407, + seed=3407, ) - + return training_params - - + def save(self, filepath: str): - torch.save({ - "type": self.type, - "task_vector": self.task_vector.detach().cpu() - }, filepath) - - + torch.save({"type": self.type, "task_vector": self.task_vector.detach().cpu()}, filepath) + def load(self, filepath: str): state = torch.load(filepath, weights_only=True) if state["type"] != self.type: - logger.warning("{} may not be consistent with this class {}.".format( - state["type"], self.type - )) + logger.warning("{} may not be consistent with this class {}.".format(state["type"], self.type)) self._task_vector = state["task_vector"].to(self._device) - - + + class CustomSFTTrainer(trl.SFTTrainer): - + def __init__(self, weight_decay_l1=None, **kwargs): - super().__init__(**kwargs) + super().__init__(**kwargs) model: Union[PreTrainedModel, nn.Module] = kwargs["model"] args: TrainingArguments = kwargs["args"] - + if hasattr(args, "weight_decay_l1") and (weight_decay_l1 is not None): print("Warning! weight_decay_l1 is overwrited by key args.") if weight_decay_l1 is not None: @@ -254,9 +238,9 @@ def __init__(self, weight_decay_l1=None, **kwargs): self.weight_decay_l1 = args.weight_decay_l1 else: assert False, "weight_decay_l1 shounld be given." - + self.parameters_l1_regularized = None - + def train( self, resume_from_checkpoint: Optional[Union[str, bool]] = None, @@ -267,14 +251,20 @@ def train( self.parameters_l1_regularized = [ (p, torch.nn.Parameter(p.clone().detach())) for n, p in self.model.named_parameters() if p.requires_grad ] - - return super().train(resume_from_checkpoint=resume_from_checkpoint, trial=trial, - ignore_keys_for_eval=ignore_keys_for_eval, **kwargs) - + + return super().train( + resume_from_checkpoint=resume_from_checkpoint, + trial=trial, + ignore_keys_for_eval=ignore_keys_for_eval, + **kwargs, + ) + def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None): # implement custom logic here - default_loss, outputs = super().compute_loss(model, inputs, return_outputs=True, num_items_in_batch=num_items_in_batch) - + default_loss, outputs = super().compute_loss( + model, inputs, return_outputs=True, num_items_in_batch=num_items_in_batch + ) + if self.weight_decay_l1 > 0: l1_norm = sum((torch.linalg.norm(p - p0, 1) for p, p0 in self.parameters_l1_regularized)) # We mask lora_A after init. @@ -282,9 +272,9 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N loss = default_loss + l1_norm else: loss = default_loss - + return (loss, outputs) if return_outputs else loss - + def set_seed(seed): random.seed(seed) @@ -294,4 +284,4 @@ def set_seed(seed): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.benchmark = False - torch.backends.cudnn.deterministic = True \ No newline at end of file + torch.backends.cudnn.deterministic = True diff --git a/learnware/specification/system/llm_general_capability_spec/__init__.py b/learnware/specification/system/llm_general_capability_spec/__init__.py index a067f62b..da1cc9ea 100644 --- a/learnware/specification/system/llm_general_capability_spec/__init__.py +++ b/learnware/specification/system/llm_general_capability_spec/__init__.py @@ -5,10 +5,8 @@ if not is_torch_available(verbose=False): LLMGeneralCapabilitySpecification = None - logger.error( - "LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!" - ) + logger.error("LLMGeneralCapabilitySpecification are not available because 'torch' is not installed!") else: from .spec import LLMGeneralCapabilitySpecification -__all__ = ["LLMGeneralCapabilitySpecification"] \ No newline at end of file +__all__ = ["LLMGeneralCapabilitySpecification"] diff --git a/learnware/specification/system/llm_general_capability_spec/config.py b/learnware/specification/system/llm_general_capability_spec/config.py index da0c9fe7..3430ff55 100644 --- a/learnware/specification/system/llm_general_capability_spec/config.py +++ b/learnware/specification/system/llm_general_capability_spec/config.py @@ -3,11 +3,13 @@ from ....tests.benchmarks import LLMBenchmarkConfig -# Score normalization functions, copied from the interactive notebook in https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization +# Score normalization functions, copied from the interactive notebook in https://huggingface.co/docs/leaderboards/open_llm_leaderboard/normalization + def normalize_within_range(value, lower_bound=0, higher_bound=1): return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100 + def compute_bbh_score(data): bbh_subtasks = { "sports_understanding": 2, @@ -33,14 +35,14 @@ def compute_bbh_score(data): "ruin_names": 6, "penguins_in_a_table": 5, "boolean_expressions": 2, - "tracking_shuffled_objects_five_objects": 5 + "tracking_shuffled_objects_five_objects": 5, } # Normalize BBH subtasks scores bbh_scores = [] for subtask, num_choices in bbh_subtasks.items(): - subtask_key = f'leaderboard_bbh_{subtask}' - if subtask_key in data['results']: - bbh_raw_score = data['results'][subtask_key]['acc_norm,none'] + subtask_key = f"leaderboard_bbh_{subtask}" + if subtask_key in data["results"]: + bbh_raw_score = data["results"][subtask_key]["acc_norm,none"] lower_bound = 1 / num_choices normalized_score = normalize_within_range(bbh_raw_score, lower_bound, 1.0) bbh_scores.append(normalized_score) @@ -49,64 +51,61 @@ def compute_bbh_score(data): bbh_score = sum(bbh_scores) / len(bbh_scores) return round(bbh_score, 2) + def compute_gpqa_score(data): - gpqa_subtasks = [ - "leaderboard_gpqa_diamond", - "leaderboard_gpqa_extended", - "leaderboard_gpqa_main" - ] + gpqa_subtasks = ["leaderboard_gpqa_diamond", "leaderboard_gpqa_extended", "leaderboard_gpqa_main"] # Normalize GPQA scores gpqa_raw_scores = [] for subtask in gpqa_subtasks: - gpqa_raw_scores.append(data['results'][subtask]['acc_norm,none']) + gpqa_raw_scores.append(data["results"][subtask]["acc_norm,none"]) gpqa_raw_score = sum(gpqa_raw_scores) / len(gpqa_raw_scores) gpqa_score = normalize_within_range(gpqa_raw_score, 0.25, 1.0) return round(gpqa_score, 2) + def compute_ifeval_score(data): # Compute IFEval - ifeval_inst_score = data['results']['leaderboard_ifeval']['inst_level_strict_acc,none'] * 100 - ifeval_prompt_score = data['results']['leaderboard_ifeval']['prompt_level_strict_acc,none'] * 100 + ifeval_inst_score = data["results"]["leaderboard_ifeval"]["inst_level_strict_acc,none"] * 100 + ifeval_prompt_score = data["results"]["leaderboard_ifeval"]["prompt_level_strict_acc,none"] * 100 # Average IFEval scores ifeval_score = (ifeval_inst_score + ifeval_prompt_score) / 2 return round(ifeval_score, 2) + def compute_math_score(data): - math_subtasks = [ - "leaderboard_math_algebra_hard", - "leaderboard_math_counting_and_prob_hard", - "leaderboard_math_geometry_hard", - "leaderboard_math_intermediate_algebra_hard", - "leaderboard_math_num_theory_hard", - "leaderboard_math_prealgebra_hard", - "leaderboard_math_precalculus_hard" + math_subtasks = [ + "leaderboard_math_algebra_hard", + "leaderboard_math_counting_and_prob_hard", + "leaderboard_math_geometry_hard", + "leaderboard_math_intermediate_algebra_hard", + "leaderboard_math_num_theory_hard", + "leaderboard_math_prealgebra_hard", + "leaderboard_math_precalculus_hard", ] # Calculate the MATH score math_raw_scores = [] for subtask in math_subtasks: - math_raw_scores.append(data['results'][subtask]['exact_match,none']) + math_raw_scores.append(data["results"][subtask]["exact_match,none"]) math_raw_score = sum(math_raw_scores) / len(math_raw_scores) math_score = normalize_within_range(math_raw_score, 0, 1.0) return round(math_score, 2) + def compute_mmlu_pro_score(data): # Normalize MMLU PRO scores - mmlu_pro_raw_score = data['results']['leaderboard_mmlu_pro']['acc,none'] + mmlu_pro_raw_score = data["results"]["leaderboard_mmlu_pro"]["acc,none"] mmlu_pro_score = normalize_within_range(mmlu_pro_raw_score, 0.1, 1.0) return round(mmlu_pro_score, 2) + def compute_musr_score(data): - musr_subtasks = { - 'murder_mysteries': 2, - 'object_placements': 5, - 'team_allocation': 3 - } + musr_subtasks = {"murder_mysteries": 2, "object_placements": 5, "team_allocation": 3} # Normalize MUSR scores musr_scores = [] for subtask, num_choices in musr_subtasks.items(): - musr_raw_score = data['results'][f'leaderboard_musr_{subtask}']['acc_norm,none'] + musr_raw_score = data["results"][f"leaderboard_musr_{subtask}"]["acc_norm,none"] lower_bound = 1 / num_choices normalized_score = normalize_within_range(musr_raw_score, lower_bound, 1.0) musr_scores.append(normalized_score) diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 9b43da72..1e0da8cd 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -2,7 +2,7 @@ import traceback from typing import List, Dict, Optional import lm_eval -from lm_eval.models.huggingface import HFLM +from lm_eval.models.huggingface import HFLM import codecs import json import os @@ -16,6 +16,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" + class LLMGeneralCapabilitySpecification(SystemStatSpecification): """Large Language Model General Capability Specification""" @@ -35,7 +36,7 @@ def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig Learnware to generate General Capability Specification. benchmark_configs : Optional[List[LLMBenchmarkConfig]] List of LLMBenchmarkConfig. - + Returns ------- Dict[LLMBenchmarkConfig, float] @@ -54,20 +55,20 @@ def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig tasks=[config.name], task_manager=task_manager, ) - + if config.score_function: score = config.score_function(results) else: - score = results['results'][config.name][f'{config.eval_metric},none'] * 100 + score = results["results"][config.name][f"{config.eval_metric},none"] * 100 score = round(score, 2) logger.info(f"Name: {config.name}, Score: {score}") score_dict[config.name] = score - + except Exception as e: traceback.print_exc() message = f"Evaluation of {config.name} failed! Due to {repr(e)}." logger.warning(message) - + return score_dict def generate_stat_spec_from_system( @@ -90,10 +91,12 @@ def generate_stat_spec_from_system( if benchmark_configs: for config in benchmark_configs: if config.eval_metric == None and config.score_function == None: - raise Exception("Must specify an evaluation metric or a score computing function in a LLMBenchmarkConfig object to get the evaluation score.") + raise Exception( + "Must specify an evaluation metric or a score computing function in a LLMBenchmarkConfig object to get the evaluation score." + ) else: logger.info("No passed benchmark_configs. Set benchmark_configs by default.") - benchmark_configs = self.benchmark_configs + benchmark_configs = self.benchmark_configs if update_existing: logger.info("Update existing LLMGeneralCapabilitySpecification.") self.score_dict = self._get_scores(learnware, benchmark_configs) @@ -114,11 +117,10 @@ def generate_stat_spec_from_system( else: logger.info("All LLMBenchmarkConfig have been evaluated before. No update.") - def __str__(self): spec_to_save = self.get_states() return json.dumps(spec_to_save, separators=(",", ":")) - + def save(self, filepath: str): """Save the computed specification to a specified path in JSON format. diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index 0426d417..70230e7b 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -98,28 +98,28 @@ def prepare_dataset(self) -> None: name=self.subset_name, **self.dataset_kwargs if self.dataset_kwargs is not None else {}, ) - + def get_train_dataset(self) -> Dataset: if self.train_split: train_dataset = self.dataset[self.train_split] - if self.dataset_path == "meta-math/GSM8K_zh": - train_dataset = train_dataset.filter(lambda x: x['split']=='train') + if self.dataset_path == "meta-math/GSM8K_zh": + train_dataset = train_dataset.filter(lambda x: x["split"] == "train") if self.preprocess_function: - train_dataset = train_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + train_dataset = train_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched=True) return train_dataset - + def get_val_dataset(self) -> Dataset: if self.validation_split: val_dataset = self.dataset[self.validation_split] if self.preprocess_function: - val_dataset = val_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + val_dataset = val_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched=True) return val_dataset def get_test_dataset(self) -> Dataset: if self.test_split: test_dataset = self.dataset[self.test_split] if self.preprocess_function: - test_dataset = test_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched = True) + test_dataset = test_dataset.map(lambda x: {"text": self.preprocess_function(x)}, batched=True) return test_dataset def get_train_data(self) -> List[str]: @@ -128,7 +128,7 @@ def get_train_data(self) -> List[str]: train_dataset = self.get_train_dataset() train_data = train_dataset["text"] return train_data - + def get_val_data(self) -> List[str]: if not self.preprocess_function: raise Exception("Must specify a preprocess function to get validation data!") diff --git a/learnware/tests/benchmarks/llm_process_funcs.py b/learnware/tests/benchmarks/llm_process_funcs.py index 40dd2f27..5fe9a009 100644 --- a/learnware/tests/benchmarks/llm_process_funcs.py +++ b/learnware/tests/benchmarks/llm_process_funcs.py @@ -136,7 +136,7 @@ def preprocess_finance(docs) -> List[str]: outputs = docs["answer"] texts = [] for instruction, output in zip(instructions, outputs): - instruction.rstrip(' Answer:') + instruction.rstrip(" Answer:") text = alpaca_no_input_prompt.format(instruction, output) texts.append(text) return texts @@ -153,7 +153,6 @@ def preprocess_math_train(docs) -> List[str]: return texts - def preprocess_medmcqa_no_label(docs) -> List[str]: opas = docs["opa"] opbs = docs["opb"] @@ -245,11 +244,7 @@ def preprocess_mmlu_no_label(docs) -> List[str]: for question, options in zip(questions, choices): texts.append( "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer:".format( - question.strip(), - options[0], - options[1], - options[2], - options[3] + question.strip(), options[0], options[1], options[2], options[3] ) ) return texts @@ -258,17 +253,12 @@ def preprocess_mmlu_no_label(docs) -> List[str]: def preprocess_mmlu(docs) -> List[str]: questions = docs["question"] choices = docs["choices"] - answers = docs["answer"] + answers = docs["answer"] texts = [] for question, options, answer in zip(questions, choices, answers): texts.append( "{}\nA. {}\nB. {}\nC. {}\nD. {}\nAnswer: {}".format( - question.strip(), - options[0], - options[1], - options[2], - options[3], - ["A", "B", "C", "D"][answer] + question.strip(), options[0], options[1], options[2], options[3], ["A", "B", "C", "D"][answer] ) ) return texts @@ -307,9 +297,7 @@ def preprocess_cmmlu_no_label(docs) -> List[str]: ds = docs["D"] texts = [] for question, a, b, c, d in zip(questions, as_, bs, cs, ds): - texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format( - question.strip(), a, b, c, d - )) + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:".format(question.strip(), a, b, c, d)) return texts @@ -319,12 +307,10 @@ def preprocess_cmmlu(docs) -> List[str]: bs = docs["B"] cs = docs["C"] ds = docs["D"] - answers = docs["Answer"] + answers = docs["Answer"] texts = [] for question, a, b, c, d, answer in zip(questions, as_, bs, cs, ds, answers): - texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format( - question.strip(), a, b, c, d, answer - )) + texts.append("{}\nA. {}\nB. {}\nC. {}\nD. {}\n答案:{}".format(question.strip(), a, b, c, d, answer)) return texts @@ -340,19 +326,20 @@ def preprocess_mathqa(docs) -> List[str]: options = docs["options"] texts = [] for problem, correct, option in zip(problems, corrects, options): - choices = [ - c[4:].rstrip(" ,") - for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option) - ] - + choices = [c[4:].rstrip(" ,") for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", option)] + # answer = ['a', 'b', 'c', 'd', 'e'].index(correct) - texts.append("Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format(problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct)) + texts.append( + "Question: {}\na. {}\nb. {}\nc. {}\nd. {}\ne. {}\nAnswer: {}".format( + problem, choices[0], choices[1], choices[2], choices[3], choices[4], correct + ) + ) return texts def preprocess_mgsm_no_label(docs) -> List[str]: questions = docs["question"] - texts = [f"问题: "+question+"\n逐步解答:" for question in questions] + texts = [f"问题: " + question + "\n逐步解答:" for question in questions] return texts @@ -387,4 +374,3 @@ def preprocess_math_no_label(docs) -> List[str]: def preprocess_finance_no_label(docs) -> List[str]: return docs["query"] - diff --git a/setup.py b/setup.py index 27dfcb40..cd416126 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,7 @@ def get_version(rel_path: str) -> str: "trl>=0.11.4", "datasets>=2.16.0", "peft>=0.13.2", - "lm_eval>=0.4.7" + "lm_eval>=0.4.7", ] DEV_REQUIRED = [ diff --git a/tests/test_specification/test_general_spec.py b/tests/test_specification/test_general_spec.py index 73338feb..cd6c472d 100644 --- a/tests/test_specification/test_general_spec.py +++ b/tests/test_specification/test_general_spec.py @@ -12,12 +12,13 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "1" + class TestGeneralCapabilitySpec(unittest.TestCase): @staticmethod def _test_general_spec(learnware, benchmark_configs): spec = LLMGeneralCapabilitySpecification() spec.generate_stat_spec_from_system(learnware=learnware, benchmark_configs=benchmark_configs) - + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: spec_path = os.path.join(tempdir, "general_spec.json") spec.save(spec_path) @@ -34,7 +35,7 @@ def test_general_spec(self): client = LearnwareClient() learnware = client.load_learnware(learnware_id="00002681") self._test_general_spec(learnware, test_benchmark_configs) - + @staticmethod def _prepare_learnware_market() -> LearnwareMarket: """initialize learnware market""" @@ -65,4 +66,4 @@ def test_in_checker_organizer(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 24c0ced5..7a124006 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -16,14 +16,15 @@ from text_generative_utils import DATASET, prepare_data + class TestGenerativeModelSpecification(unittest.TestCase): @staticmethod def _test_with_X(X): spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(X=X, dataset_text_field="txt") - + task_vector = spec.task_vector - + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: spec_path = os.path.join(tempdir, "spec.pth") spec.save(spec_path) @@ -33,18 +34,18 @@ def _test_with_X(X): spec2 = GenerativeModelSpecification() spec2.load(spec_path) - + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) - + assert spec2.type == "GenerativeModelSpecification" - + @staticmethod def _test_with_dataset(dataset): spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(dataset=dataset) - + task_vector = spec.task_vector - + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: spec_path = os.path.join(tempdir, "spec.pth") spec.save(spec_path) @@ -54,16 +55,16 @@ def _test_with_dataset(dataset): spec2 = GenerativeModelSpecification() spec2.load(spec_path) - + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" - + @staticmethod def _test_with_generating_directly(X): spec = generate_generative_model_spec(X=X, dataset_text_field="name") - + task_vector = spec.task_vector - + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: spec_path = os.path.join(tempdir, "spec.pth") spec.save(spec_path) @@ -73,45 +74,39 @@ def _test_with_generating_directly(X): spec2 = GenerativeModelSpecification() spec2.load(spec_path) - + torch.testing.assert_close(task_vector.cpu(), spec2.task_vector.cpu()) assert spec2.type == "GenerativeModelSpecification" - + def test_loading_from_bwm(self): spec = GenerativeModelSpecification() _, model1 = spec._init_tokenizer_model(True) _, model2 = spec._init_tokenizer_model(False) - + params1, params2 = dict(model1.named_parameters()), dict(model2.named_parameters()) for k in model1.state_dict(): torch.testing.assert_close(params1[k].cpu(), params2[k].cpu()) def test_generating_spec(self): train_dataset = prepare_data(DATASET["pubmedqa"]) - + self._test_with_X(train_dataset["text"]) self._test_with_dataset(train_dataset) self._test_with_dataset(train_dataset, beimingwu=False) - + def test_searching_spec(self): specs, learnwares = [], [] for i, dataset_name in enumerate(["pubmedqa", "medmcqa"]): train_dataset = prepare_data(DATASET[dataset_name]) - + spec = GenerativeModelSpecification() spec.generate_stat_spec_from_data(dataset=train_dataset) - + specs.append(spec) - learnwares.append(Learnware(str(i), None, Specification( - stat_spec={spec.type: spec} - ), "")) - + learnwares.append(Learnware(str(i), None, Specification(stat_spec={spec.type: spec}), "")) + searcher = LLMStatSearcher(None) - searcher._search_by_taskvector_spec_single( - learnwares, - specs[-1], - specs[-1].type - ) + searcher._search_by_taskvector_spec_single(learnwares, specs[-1], specs[-1].type) if __name__ == "__main__": diff --git a/tests/test_specification/text_generative_utils.py b/tests/test_specification/text_generative_utils.py index 78b2347b..40ca828e 100644 --- a/tests/test_specification/text_generative_utils.py +++ b/tests/test_specification/text_generative_utils.py @@ -6,6 +6,7 @@ "pubmedqa": "bigbio/pubmed_qa,pubmed_qa_labeled_fold0_source", } + def preprocess_medmcqa(doc) -> str: """ Question: @@ -30,6 +31,7 @@ def preprocess_medmcqa(doc) -> str: prompt += "Answer:" return prompt + def preprocess_pubmedqa(doc) -> str: ctxs = "\n".join(doc["CONTEXTS"]) return "Abstract: {}\nQuestion: {}\nAnswer:".format( @@ -37,12 +39,14 @@ def preprocess_pubmedqa(doc) -> str: doc["QUESTION"], ) + PROCESS_FUNC = { # medical user "openlifescienceai/medmcqa": preprocess_medmcqa, "bigbio/pubmed_qa": preprocess_pubmedqa, } + def prepare_data(dataset_name_str): temp_list = dataset_name_str.split(",") subset_name = None @@ -50,8 +54,8 @@ def prepare_data(dataset_name_str): subset_name = temp_list[1] dataset_name = temp_list[0] if subset_name: - test_dataset = load_dataset(dataset_name, subset_name, split="test", trust_remote_code=True) + test_dataset = load_dataset(dataset_name, subset_name, split="test", trust_remote_code=True) else: - test_dataset = load_dataset(dataset_name, split="test", trust_remote_code=True) + test_dataset = load_dataset(dataset_name, split="test", trust_remote_code=True) test_dataset = test_dataset.map(lambda x: {"text": PROCESS_FUNC[dataset_name](x)}) return test_dataset From 6b9cd913888dfcddd2b53ef05ff0935b2af1412a Mon Sep 17 00:00:00 2001 From: Zhang Xinyu <82640795+Asymptotez@users.noreply.github.com> Date: Sun, 25 May 2025 17:01:49 +0800 Subject: [PATCH 102/108] [FIX] Update github actions ubuntu version Just for testing. --- .github/workflows/test_learnware_with_source.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_learnware_with_source.yaml b/.github/workflows/test_learnware_with_source.yaml index 13b6ac94..0e4bd7ad 100644 --- a/.github/workflows/test_learnware_with_source.yaml +++ b/.github/workflows/test_learnware_with_source.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04] + os: [ubuntu-22.04] python-version: [3.9] steps: @@ -50,4 +50,4 @@ jobs: - name: Test workflow run: | - conda run -n learnware python -m pytest tests/test_workflow/test_hetero_workflow.py \ No newline at end of file + conda run -n learnware python -m pytest tests/test_workflow/test_hetero_workflow.py From 49878d06a43ce857e85759a0081379397be4130a Mon Sep 17 00:00:00 2001 From: Zhang Xinyu <82640795+Asymptotez@users.noreply.github.com> Date: Sun, 25 May 2025 17:02:35 +0800 Subject: [PATCH 103/108] [FIX] Update github actions ubuntu version Just for testing. --- .github/workflows/test_learnware_with_pip.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_learnware_with_pip.yaml b/.github/workflows/test_learnware_with_pip.yaml index c59a63fa..48fccf2c 100644 --- a/.github/workflows/test_learnware_with_pip.yaml +++ b/.github/workflows/test_learnware_with_pip.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04] + os: [ubuntu-22.04] python-version: [3.9] steps: From c5b4199f6f00f75fd785675147cae1cdc0eef351 Mon Sep 17 00:00:00 2001 From: HaoyuShi Date: Sun, 25 May 2025 23:11:30 +0800 Subject: [PATCH 104/108] [MNT] Remove redundant comments. --- .../specification/regular/text/generative.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index ac1b96eb..c3ab7af7 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -189,21 +189,21 @@ def _init_trainer(self, model, tokenizer, train_dataset, args): def _trainer_config(self, temp_dir, dataset_text_field): training_params = SFTConfig( - output_dir=temp_dir, # 结果路径 + output_dir=temp_dir, max_steps=self.__extra_args["max_steps"], - per_device_train_batch_size=self.per_device_train_batch_size, # 这是每个GPU的训练批次大小 - gradient_accumulation_steps=self.gradient_accumulation_steps, # 累积多个步骤的梯度,以有效地增加批次大小 - learning_rate=self.__extra_args["lr"], # 初始学习率 - weight_decay=self.__extra_args["weight_decay_l2"], # 权重衰减率 - optim="adamw_torch", # 优化器 + per_device_train_batch_size=self.per_device_train_batch_size, + gradient_accumulation_steps=self.gradient_accumulation_steps, + learning_rate=self.__extra_args["lr"], + weight_decay=self.__extra_args["weight_decay_l2"], + optim="adamw_torch", eval_strategy="no", save_strategy="no", - # fp16=True, # 启用混合精度训练 - # bf16=True, # 启用BF16 - max_grad_norm=self.__extra_args["max_grad_norm"], # 裁剪梯度 - warmup_ratio=self.__extra_args["warmup_ratio"], # 训练开始时的预热样本比例 - group_by_length=True, # 将训练数据集中大致相同长度的样本分组到同一batch中,提升prefill效率 - lr_scheduler_type="cosine", # 学习率调度器衰减策略 + # fp16=True, + # bf16=True, + max_grad_norm=self.__extra_args["max_grad_norm"], + warmup_ratio=self.__extra_args["warmup_ratio"], + group_by_length=True, + lr_scheduler_type="cosine", ddp_timeout=180000000, dataset_text_field=dataset_text_field, max_seq_length=self.max_seq_length, From b98853eddd8ff6668efc1dcb53f4c6cddf31618e Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 26 May 2025 00:59:26 +0800 Subject: [PATCH 105/108] [MNT] Refactor imports and clean up code structure across multiple modules --- .../extra_tasks/flare/flare.py | 42 +++++++++---------- learnware/market/__init__.py | 6 +-- learnware/market/easy/checker.py | 1 - learnware/market/easy/searcher.py | 2 +- learnware/market/llm/organizer.py | 7 +--- learnware/market/llm/searcher.py | 10 ++--- learnware/market/module.py | 6 +-- learnware/specification/__init__.py | 4 +- learnware/specification/module.py | 3 +- learnware/specification/regular/__init__.py | 2 +- learnware/specification/regular/base.py | 7 ++-- .../specification/regular/text/__init__.py | 2 +- .../specification/regular/text/generative.py | 17 +++----- .../llm_general_capability_spec/config.py | 1 + .../llm_general_capability_spec/spec.py | 18 ++++---- learnware/tests/__init__.py | 2 +- learnware/tests/benchmarks/__init__.py | 4 +- learnware/tests/benchmarks/config.py | 2 +- 18 files changed, 63 insertions(+), 73 deletions(-) diff --git a/examples/dataset_llm_workflow/extra_tasks/flare/flare.py b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py index 2a16b6d2..1557bebe 100644 --- a/examples/dataset_llm_workflow/extra_tasks/flare/flare.py +++ b/examples/dataset_llm_workflow/extra_tasks/flare/flare.py @@ -2,6 +2,11 @@ FLARE """ +from typing import List +from transformers import BartTokenizer, BartForConditionalGeneration +import traceback +import torch.nn as nn +import torch from lm_eval.api.instance import Instance import numpy as np from seqeval.metrics import f1_score as entity_score @@ -32,7 +37,7 @@ def process_text(entity_string, text): # Iterate over the entity list # print (entity_list) for entity, entity_type in entity_list: - entity_words = entity.split() + entity.split() entity_lower = entity # Find start and end index of each occurrence of the entity in the text @@ -63,7 +68,7 @@ def process_text(entity_string, text): _CITATION = """ @misc{xie2023pixiu, - title={PIXIU: A Large Language Model, Instruction Data and Evaluation Benchmark for Finance}, + title={PIXIU: A Large Language Model, Instruction Data and Evaluation Benchmark for Finance}, author={Qianqian Xie and Weiguang Han and Xiao Zhang and Yanzhao Lai and Min Peng and Alejandro Lopez-Lira and Jimin Huang}, year={2023}, eprint={2306.05443}, @@ -294,7 +299,7 @@ def process_result(self, pred, gold, tokens): for index, pre in enumerate(pred.split("\n")[: len(tokens)]): try: word, label = pre.split(":") - except: + except BaseException: continue if word == tokens[index] and label in self.LMAP.keys(): format_pred[index] = label @@ -312,7 +317,7 @@ def process_label_result(self, pred, gold, tokens): for index, pre in enumerate(pred.split("\n")[: len(tokens)]): try: word, label = pre.split(":") - except: + except BaseException: continue if word == tokens[index]: format_pred[index] = self.LMAP.get(label, -1) @@ -1000,9 +1005,9 @@ def label_avg(cls, items): preds = np.array(preds) golds = np.array(golds) all_f1s = [] - for l in label_set: - pds = preds[labels == l] - gds = golds[labels == l] + for label_val in label_set: + pds = preds[labels == label_val] + gds = golds[labels == label_val] f1 = f1_score(gds, pds, average="weighted", labels=[0, 1]) all_f1s.append(f1) return np.mean(all_f1s) @@ -1273,14 +1278,15 @@ class FSRL(SequentialLabeling): } -class CFA(Classification): - DATASET_PATH = "chancefocus/flare-cfa" - - def has_training_docs(self): - return False - - def has_validation_docs(self): - return False +# This class is already defined above at line 1200 +# class CFA(Classification): +# DATASET_PATH = "chancefocus/flare-cfa" +# +# def has_training_docs(self): +# return False +# +# def has_validation_docs(self): +# return False # class FinargECCAUC(Classification): @@ -1485,12 +1491,6 @@ class travelinsurace(Classification): ############### # %% -import torch -import torch.nn as nn -import traceback -from transformers import BartTokenizer, BartForConditionalGeneration -from typing import List -import numpy as np class BARTScorer: diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 852151bb..760884ca 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -2,13 +2,13 @@ from .base import BaseChecker, BaseOrganizer, BaseSearcher, BaseUserInfo, LearnwareMarket from .classes import CondaChecker from .easy import ( - EasyOrganizer, EasyExactSemanticSearcher, EasyFuzzSemanticSearcher, - EasyStatSearcher, - SeqCombinedSearcher, + EasyOrganizer, EasySemanticChecker, EasyStatChecker, + EasyStatSearcher, + SeqCombinedSearcher, ) from .evolve import EvolvedOrganizer from .evolve_anchor import EvolvedAnchoredOrganizer diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 453e9a7d..568ce346 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -198,7 +198,6 @@ def __call__(self, learnware): "GenerativeModelSpecification", "LLMGeneralCapabilitySpecification", ]: - if semantic_spec["Model"]["Values"][0] != "Others": len_ = random.randint(10, 1000) inputs = EasyStatChecker._generate_random_text_list(10, "en", len_, len_) diff --git a/learnware/market/easy/searcher.py b/learnware/market/easy/searcher.py index a833689c..9847cb1c 100644 --- a/learnware/market/easy/searcher.py +++ b/learnware/market/easy/searcher.py @@ -6,9 +6,9 @@ from rapidfuzz import fuzz from ..base import ( + AtomicSearcher, BaseOrganizer, BaseSearcher, - AtomicSearcher, BaseUserInfo, MultipleSearchItem, SearchResults, diff --git a/learnware/market/llm/organizer.py b/learnware/market/llm/organizer.py index ea07363b..c44c3bb9 100644 --- a/learnware/market/llm/organizer.py +++ b/learnware/market/llm/organizer.py @@ -1,23 +1,20 @@ import os import tempfile -import zipfile import traceback +import zipfile from shutil import copyfile from typing import List, Union from ..heterogeneous import HeteroMapTableOrganizer -from ..base import BaseChecker from ...config import C -from ...utils import read_yaml_to_dict, save_dict_to_yaml from ...logger import get_module_logger from ...specification import LLMGeneralCapabilitySpecification -from ...tests.benchmarks import BenchmarkConfig +from ...utils import read_yaml_to_dict, save_dict_to_yaml logger = get_module_logger("llm_easy_organizer") class LLMEasyOrganizer(HeteroMapTableOrganizer): - def _update_learnware_general_capability_spec(self, ids: Union[str, List[str]]): """Update learnware by ids, attempting to generate LLMGeneralCapabilitySpecification for them. diff --git a/learnware/market/llm/searcher.py b/learnware/market/llm/searcher.py index 015559d9..83015c6d 100644 --- a/learnware/market/llm/searcher.py +++ b/learnware/market/llm/searcher.py @@ -1,17 +1,17 @@ -from typing import List, Optional, Tuple, Union +from typing import List, Tuple, Union import numpy as np import torch +from torch.nn.functional import softmax from learnware.learnware.base import Learnware from learnware.specification.base import Specification -from ..utils import parse_specification_type -from ..base import BaseUserInfo, MultipleSearchItem, SearchResults, AtomicSearcher, SingleSearchItem + +from ..base import BaseUserInfo, SearchResults, SingleSearchItem from ..easy import EasyStatSearcher +from ..utils import parse_specification_type from ...logger import get_module_logger -from torch.nn.functional import softmax - logger = get_module_logger("llm_searcher") diff --git a/learnware/market/module.py b/learnware/market/module.py index 2dbb7a6e..ba4d9acf 100644 --- a/learnware/market/module.py +++ b/learnware/market/module.py @@ -1,12 +1,12 @@ from .base import LearnwareMarket from .classes import CondaChecker from .easy import ( - EasyOrganizer, EasyFuzzSemanticSearcher, - EasyStatSearcher, - SeqCombinedSearcher, + EasyOrganizer, EasySemanticChecker, EasyStatChecker, + EasyStatSearcher, + SeqCombinedSearcher, ) from .heterogeneous import HeteroMapTableOrganizer, HeteroStatSearcher from .llm import LLMEasyOrganizer, LLMStatSearcher diff --git a/learnware/specification/__init__.py b/learnware/specification/__init__.py index f2573955..64bfe7d1 100644 --- a/learnware/specification/__init__.py +++ b/learnware/specification/__init__.py @@ -1,11 +1,11 @@ from .base import BaseStatSpecification, Specification from .regular import ( + GenerativeModelSpecification, RegularStatSpecification, RKMEImageSpecification, RKMEStatSpecification, RKMETableSpecification, RKMETextSpecification, - GenerativeModelSpecification, rkme_solve_qp, ) from .system import HeteroMapTableSpecification, LLMGeneralCapabilitySpecification @@ -20,10 +20,10 @@ generate_semantic_spec = None else: from .module import ( + generate_generative_model_spec, generate_rkme_image_spec, generate_rkme_table_spec, generate_rkme_text_spec, - generate_generative_model_spec, generate_semantic_spec, generate_stat_spec, ) diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 9aff5f5a..ffd0d2fb 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -3,10 +3,9 @@ import numpy as np import pandas as pd import torch - from datasets import Dataset -from .regular import RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification, GenerativeModelSpecification +from .regular import GenerativeModelSpecification, RKMEImageSpecification, RKMETableSpecification, RKMETextSpecification from .utils import convert_to_numpy from ..config import C diff --git a/learnware/specification/regular/__init__.py b/learnware/specification/regular/__init__.py index eb6268c0..3544b566 100644 --- a/learnware/specification/regular/__init__.py +++ b/learnware/specification/regular/__init__.py @@ -1,7 +1,7 @@ from .base import RegularStatSpecification from .image import RKMEImageSpecification from .table import RKMEStatSpecification, RKMETableSpecification, rkme_solve_qp -from .text import RKMETextSpecification, GenerativeModelSpecification +from .text import GenerativeModelSpecification, RKMETextSpecification __all__ = [ "RegularStatSpecification", diff --git a/learnware/specification/regular/base.py b/learnware/specification/regular/base.py index f03a6de1..8159d12e 100644 --- a/learnware/specification/regular/base.py +++ b/learnware/specification/regular/base.py @@ -1,9 +1,9 @@ from __future__ import annotations -from ..base import BaseStatSpecification - from torch.nn.functional import cosine_similarity +from ..base import BaseStatSpecification + class RegularStatSpecification(BaseStatSpecification): def generate_stat_spec(self, **kwargs): @@ -18,10 +18,9 @@ def generate_stat_spec_from_data(self, **kwargs): class TaskVectorSpecification(RegularStatSpecification): - @property def task_vector(self): - raise NotImplemented + raise NotImplementedError def similarity(self, other: TaskVectorSpecification) -> float: """Compute cosine similarity between two task vectors.""" diff --git a/learnware/specification/regular/text/__init__.py b/learnware/specification/regular/text/__init__.py index b9bfba6d..47d1fc16 100644 --- a/learnware/specification/regular/text/__init__.py +++ b/learnware/specification/regular/text/__init__.py @@ -10,7 +10,7 @@ "RKMETextSpecification and GenerativeModelSpecification are not available because 'torch' is not installed!" ) else: - from .rkme import RKMETextSpecification from .generative import GenerativeModelSpecification + from .rkme import RKMETextSpecification __all__ = ["RKMETextSpecification", "GenerativeModelSpecification"] diff --git a/learnware/specification/regular/text/generative.py b/learnware/specification/regular/text/generative.py index c3ab7af7..e5eb7b91 100644 --- a/learnware/specification/regular/text/generative.py +++ b/learnware/specification/regular/text/generative.py @@ -6,18 +6,13 @@ from typing import Any, Dict, List, Optional, Union import numpy as np -import trl import torch - +import trl +from datasets import Dataset +from peft import LoraConfig, PeftModel, get_peft_model from torch import nn - +from transformers import PreTrainedModel, Qwen2ForCausalLM, Qwen2Tokenizer, TrainingArguments from trl import SFTConfig -from peft import LoraConfig, PeftModel -from datasets import Dataset - -from transformers import PreTrainedModel, TrainingArguments, Qwen2ForCausalLM, Qwen2Tokenizer - -from peft import get_peft_model from ..base import TaskVectorSpecification from ....logger import get_module_logger @@ -173,7 +168,6 @@ def _init_tokenizer_model(self, beimingwu): return tokenizer, model def _init_trainer(self, model, tokenizer, train_dataset, args): - # TODO: set_seed(3407) trainer = CustomSFTTrainer( model=model, @@ -224,7 +218,6 @@ def load(self, filepath: str): class CustomSFTTrainer(trl.SFTTrainer): - def __init__(self, weight_decay_l1=None, **kwargs): super().__init__(**kwargs) model: Union[PreTrainedModel, nn.Module] = kwargs["model"] @@ -244,7 +237,7 @@ def __init__(self, weight_decay_l1=None, **kwargs): def train( self, resume_from_checkpoint: Optional[Union[str, bool]] = None, - trial: Union["optuna.Trial", Dict[str, Any]] = None, + trial: Union["optuna.Trial", Dict[str, Any]] = None, # noqa: F821 ignore_keys_for_eval: Optional[List[str]] = None, **kwargs, ): diff --git a/learnware/specification/system/llm_general_capability_spec/config.py b/learnware/specification/system/llm_general_capability_spec/config.py index 3430ff55..5fee42a0 100644 --- a/learnware/specification/system/llm_general_capability_spec/config.py +++ b/learnware/specification/system/llm_general_capability_spec/config.py @@ -1,4 +1,5 @@ from typing import List + import numpy as np from ....tests.benchmarks import LLMBenchmarkConfig diff --git a/learnware/specification/system/llm_general_capability_spec/spec.py b/learnware/specification/system/llm_general_capability_spec/spec.py index 1e0da8cd..5dad78df 100644 --- a/learnware/specification/system/llm_general_capability_spec/spec.py +++ b/learnware/specification/system/llm_general_capability_spec/spec.py @@ -1,16 +1,18 @@ from __future__ import annotations -import traceback -from typing import List, Dict, Optional -import lm_eval -from lm_eval.models.huggingface import HFLM + import codecs import json import os +import traceback +from typing import Dict, List, Optional + +import lm_eval +from lm_eval.models.huggingface import HFLM from .config import general_capability_benchmark_configs from ..base import SystemStatSpecification -from ....tests.benchmarks import LLMBenchmarkConfig from ....logger import get_module_logger +from ....tests.benchmarks import LLMBenchmarkConfig logger = get_module_logger("llm_general_capability_spec") @@ -27,7 +29,7 @@ def __init__(self): super(LLMGeneralCapabilitySpecification, self).__init__(type=self.__class__.__name__) @staticmethod - def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig]) -> Dict: + def _get_scores(learnware, benchmark_configs: List[LLMBenchmarkConfig]) -> Dict: """Use [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) framework to evaluate learnware according to benchmark_configs and compute score dict. Parameters @@ -73,7 +75,7 @@ def _get_scores(learnware: Learnware, benchmark_configs: List[LLMBenchmarkConfig def generate_stat_spec_from_system( self, - learnware: Learnware, + learnware, benchmark_configs: Optional[List[LLMBenchmarkConfig]] = None, update_existing: bool = False, ): @@ -90,7 +92,7 @@ def generate_stat_spec_from_system( """ if benchmark_configs: for config in benchmark_configs: - if config.eval_metric == None and config.score_function == None: + if config.eval_metric is None and config.score_function is None: raise Exception( "Must specify an evaluation metric or a score computing function in a LLMBenchmarkConfig object to get the evaluation score." ) diff --git a/learnware/tests/__init__.py b/learnware/tests/__init__.py index e9d27022..7e073c99 100644 --- a/learnware/tests/__init__.py +++ b/learnware/tests/__init__.py @@ -1,4 +1,4 @@ -from .utils import parametrize from .benchmarks.config import llm_general_capability_benchmark_configs +from .utils import parametrize __all__ = ["parametrize", "llm_general_capability_benchmark_configs"] diff --git a/learnware/tests/benchmarks/__init__.py b/learnware/tests/benchmarks/__init__.py index 70230e7b..609185ee 100644 --- a/learnware/tests/benchmarks/__init__.py +++ b/learnware/tests/benchmarks/__init__.py @@ -3,10 +3,10 @@ import tempfile import zipfile from dataclasses import dataclass -from typing import List, Optional, Tuple, Union, Callable -from datasets import load_dataset, Dataset +from typing import Callable, List, Optional, Tuple, Union import numpy as np +from datasets import Dataset, load_dataset from .config import BenchmarkConfig, LLMBenchmarkConfig, benchmark_configs from ..data import GetData diff --git a/learnware/tests/benchmarks/config.py b/learnware/tests/benchmarks/config.py index e5d45540..e8bf60cb 100644 --- a/learnware/tests/benchmarks/config.py +++ b/learnware/tests/benchmarks/config.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Callable, Union +from typing import Callable, Dict, List, Optional, Union @dataclass From b0af2a0a942d32bed648eaf99c380c461e0763d8 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 26 May 2025 01:19:52 +0800 Subject: [PATCH 106/108] [MNT] Update torch and torchvision dependencies to latest compatible versions --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index cd416126..c0e66102 100644 --- a/setup.py +++ b/setup.py @@ -77,8 +77,8 @@ def get_version(rel_path: str) -> str: FULL_REQUIRED = [ # The default full requirements for learnware package - "torch==2.0.1", - "torchvision==0.15.2", + "torch>=2.1.0", + "torchvision>=0.16.0", "torch-optimizer>=0.3.0", "lightgbm>=3.3.0", "sentence_transformers==3.2.1", From 2e9bce1c1e5f2c85f14bfb4b59901c542dabfebd Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 26 May 2025 01:34:08 +0800 Subject: [PATCH 107/108] [MNT] Remove unused imports and clean up code for F401 & F541. --- examples/dataset_llm_workflow/benchmark/__init__.py | 1 - examples/dataset_llm_workflow/benchmark/utils.py | 3 +-- examples/dataset_llm_workflow/workflow.py | 4 ---- learnware/tests/benchmarks/llm_process_funcs.py | 4 ++-- tests/test_specification/test_text_generative.py | 3 --- 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/examples/dataset_llm_workflow/benchmark/__init__.py b/examples/dataset_llm_workflow/benchmark/__init__.py index 2650b0f2..e69de29b 100644 --- a/examples/dataset_llm_workflow/benchmark/__init__.py +++ b/examples/dataset_llm_workflow/benchmark/__init__.py @@ -1 +0,0 @@ -from .base import Benchmark diff --git a/examples/dataset_llm_workflow/benchmark/utils.py b/examples/dataset_llm_workflow/benchmark/utils.py index a8be1406..2e62d26b 100644 --- a/examples/dataset_llm_workflow/benchmark/utils.py +++ b/examples/dataset_llm_workflow/benchmark/utils.py @@ -1,9 +1,8 @@ import re import random from datasets import load_dataset, concatenate_datasets -from typing import List -from .config import LEARNWARE_FIN, LEARNWARE_MATH, LEARNWARE_MED, USER_FIN +from .config import LEARNWARE_FIN, USER_FIN def preprocess_alpaca(docs): diff --git a/examples/dataset_llm_workflow/workflow.py b/examples/dataset_llm_workflow/workflow.py index d8b25270..0b1bbfff 100644 --- a/examples/dataset_llm_workflow/workflow.py +++ b/examples/dataset_llm_workflow/workflow.py @@ -2,10 +2,7 @@ import time import tempfile import os -import copy import pandas as pd -import torch -import shutil import json import re import numpy as np @@ -19,7 +16,6 @@ from learnware.specification import GenerativeModelSpecification from benchmark import Benchmark -from benchmark.config import USER_FIN, USER_MATH, USER_MED from eval_config import CONFIG logger = get_module_logger("llm_workflow", level="INFO") diff --git a/learnware/tests/benchmarks/llm_process_funcs.py b/learnware/tests/benchmarks/llm_process_funcs.py index 5fe9a009..c55b6d21 100644 --- a/learnware/tests/benchmarks/llm_process_funcs.py +++ b/learnware/tests/benchmarks/llm_process_funcs.py @@ -170,7 +170,7 @@ def preprocess_medmcqa_no_label(docs) -> List[str]: prompt = "Question: " + question + "\nChoices:\n" for choice, option in option_choices.items(): prompt += f"{choice.upper()}. {option}\n" - prompt += f"Answer:" + prompt += "Answer:" texts.append(prompt) return texts @@ -339,7 +339,7 @@ def preprocess_mathqa(docs) -> List[str]: def preprocess_mgsm_no_label(docs) -> List[str]: questions = docs["question"] - texts = [f"问题: " + question + "\n逐步解答:" for question in questions] + texts = ["问题: " + question + "\n逐步解答:" for question in questions] return texts diff --git a/tests/test_specification/test_text_generative.py b/tests/test_specification/test_text_generative.py index 7a124006..1b26077c 100644 --- a/tests/test_specification/test_text_generative.py +++ b/tests/test_specification/test_text_generative.py @@ -1,12 +1,9 @@ -import json import os import tempfile import unittest -import numpy as np import torch -import sys from learnware.learnware.base import Learnware from learnware.market.llm import LLMStatSearcher From 361f8a48b49c93ab21c2857302a9c191fcd16426 Mon Sep 17 00:00:00 2001 From: Asymptotez <201220101@smail.nju.edu.cn> Date: Mon, 26 May 2025 01:54:39 +0800 Subject: [PATCH 108/108] [MNT] Another fix for F401. --- tests/test_specification/text_generative_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_specification/text_generative_utils.py b/tests/test_specification/text_generative_utils.py index 40ca828e..83256513 100644 --- a/tests/test_specification/text_generative_utils.py +++ b/tests/test_specification/text_generative_utils.py @@ -1,4 +1,3 @@ -from typing import List from datasets import load_dataset DATASET = {