Evaluation API Reference¶
The evaluate() function allows you to benchmark Text-to-SQL model outputs @@ -173,6 +178,7 @@
diff --git a/.gitignore b/.gitignore index a3c3066..b74d3e5 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,11 @@ llmsql_workdir evaluation_* coverage.xml + +.idea + +# Sphinx build +docs/_build/ +docs/.doctrees/ +*.doctree +*.pickle diff --git a/docs/_build/html/.buildinfo b/docs/_build/html/.buildinfo index b853fd5..d7142fb 100644 --- a/docs/_build/html/.buildinfo +++ b/docs/_build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file records the configuration used when building these files. When it is not found, a full rebuild will be done. -config: ba6688d44e6ba22fb6e40076d1af75c2 +config: 3caef0746bc07fabd8f91030ce7b6533 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_build/html/.doctrees/docs/index.doctree b/docs/_build/html/.doctrees/docs/index.doctree deleted file mode 100644 index eb4bb1d..0000000 Binary files a/docs/_build/html/.doctrees/docs/index.doctree and /dev/null differ diff --git a/docs/_build/html/.doctrees/environment.pickle b/docs/_build/html/.doctrees/environment.pickle deleted file mode 100644 index 263655d..0000000 Binary files a/docs/_build/html/.doctrees/environment.pickle and /dev/null differ diff --git a/docs/_build/html/_static/documentation_options.js b/docs/_build/html/_static/documentation_options.js index 82d487f..eede5b1 100644 --- a/docs/_build/html/_static/documentation_options.js +++ b/docs/_build/html/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '0.1.14', + VERSION: '0.1.15', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', @@ -10,4 +10,4 @@ const DOCUMENTATION_OPTIONS = { NAVIGATION_WITH_KEYS: false, SHOW_SEARCH_SUMMARY: true, ENABLE_SEARCH_SHORTCUTS: true, -}; \ No newline at end of file +}; diff --git a/docs/_build/html/_static/leaderboard.json b/docs/_build/html/_static/leaderboard.json new file mode 100644 index 0000000..c842948 --- /dev/null +++ b/docs/_build/html/_static/leaderboard.json @@ -0,0 +1,106 @@ +[ + { + "model": "openai/gpt-oss-120b", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.9049, + "date": "2026-02-24" + }, + { + "model": "openai/gpt-oss-20b", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.8871, + "date": "2026-02-24" + }, + { + "model": "meta-llama/Llama-3.3-70B-Instruct", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.8607, + "date": "2026-02-24" + }, + { + "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.8519, + "date": "2026-02-24" + }, + { + "model": "Qwen/Qwen2.5-7B-Instruct", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.794, + "date": "2026-02-24" + }, + { + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.7599, + "date": "2026-02-24" + }, + { + "model": "Qwen/Qwen2.5-1.5B-Instruct", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.6401, + "date": "2026-02-24" + }, + { + "model": "meta-llama/Llama-3.2-3B-Instruct", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.5415, + "date": "2026-02-24" + }, + { + "model": "CYFRAGOVPL/PLLuM-12B-chat", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.5224, + "date": "2026-02-24" + }, + { + "model": "Qwen/Qwen3-0.6B", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.4983, + "date": "2026-02-24" + }, + { + "model": "CYFRAGOVPL/PLLuM-12B-nc-chat", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.4044, + "date": "2026-02-24" + }, + { + "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.3727, + "date": "2026-02-24" + }, + { + "model": "meta-llama/Llama-3.2-1B-Instruct", + "type": "open-source", + "fewshots": 5, + "backend": "vllm", + "accuracy": 0.2678, + "date": "2026-02-24" + } +] \ No newline at end of file diff --git a/docs/_build/html/_static/scripts/front_page.js b/docs/_build/html/_static/scripts/front_page.js index 03fd423..1e0c0dc 100644 --- a/docs/_build/html/_static/scripts/front_page.js +++ b/docs/_build/html/_static/scripts/front_page.js @@ -48,3 +48,88 @@ if (searchInput) { } }); } + +document.addEventListener("DOMContentLoaded", async () => { + const container = document.getElementById('leaderboard-container'); + if (!container) return; + + try { + const response = await fetch('_static/leaderboard.json'); + const rows = await response.json(); + renderLeaderboard(rows); + } catch (e) { + container.innerHTML = '
Error loading leaderboard 😢
'; + console.error(e); + } +}); + +function renderLeaderboard(rows) { + const container = document.getElementById('leaderboard-container'); + container.innerHTML = ''; + + const table = document.createElement('table'); + table.className = 'leaderboard-table'; + + const thead = document.createElement('thead'); + thead.innerHTML = ` +The evaluate() function allows you to benchmark Text-to-SQL model outputs @@ -173,6 +178,7 @@