diff --git a/.gitignore b/.gitignore
index a3c3066..b74d3e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,11 @@ llmsql_workdir
 
 evaluation_*
 coverage.xml
+
+.idea
+
+# Sphinx build
+docs/_build/
+docs/.doctrees/
+*.doctree
+*.pickle
diff --git a/docs/_build/html/.buildinfo b/docs/_build/html/.buildinfo
index b853fd5..d7142fb 100644
--- a/docs/_build/html/.buildinfo
+++ b/docs/_build/html/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: ba6688d44e6ba22fb6e40076d1af75c2
+config: 3caef0746bc07fabd8f91030ce7b6533
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/_build/html/.doctrees/docs/index.doctree b/docs/_build/html/.doctrees/docs/index.doctree
deleted file mode 100644
index eb4bb1d..0000000
Binary files a/docs/_build/html/.doctrees/docs/index.doctree and /dev/null differ
diff --git a/docs/_build/html/.doctrees/environment.pickle b/docs/_build/html/.doctrees/environment.pickle
deleted file mode 100644
index 263655d..0000000
Binary files a/docs/_build/html/.doctrees/environment.pickle and /dev/null differ
diff --git a/docs/_build/html/_static/documentation_options.js b/docs/_build/html/_static/documentation_options.js
index 82d487f..eede5b1 100644
--- a/docs/_build/html/_static/documentation_options.js
+++ b/docs/_build/html/_static/documentation_options.js
@@ -1,5 +1,5 @@
 const DOCUMENTATION_OPTIONS = {
-    VERSION: '0.1.14',
+    VERSION: '0.1.15',
     LANGUAGE: 'en',
     COLLAPSE_INDEX: false,
     BUILDER: 'html',
@@ -10,4 +10,4 @@ const DOCUMENTATION_OPTIONS = {
     NAVIGATION_WITH_KEYS: false,
     SHOW_SEARCH_SUMMARY: true,
     ENABLE_SEARCH_SHORTCUTS: true,
-};
\ No newline at end of file
+};
diff --git a/docs/_build/html/_static/leaderboard.json b/docs/_build/html/_static/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/docs/_build/html/_static/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/docs/_build/html/_static/scripts/front_page.js b/docs/_build/html/_static/scripts/front_page.js
index 03fd423..1e0c0dc 100644
--- a/docs/_build/html/_static/scripts/front_page.js
+++ b/docs/_build/html/_static/scripts/front_page.js
@@ -48,3 +48,88 @@ if (searchInput) {
     }
   });
 }
+
+document.addEventListener("DOMContentLoaded", async () => {
+  const container = document.getElementById('leaderboard-container');
+  if (!container) return;
+
+  try {
+    const response = await fetch('_static/leaderboard.json');
+    const rows = await response.json();
+    renderLeaderboard(rows);
+  } catch (e) {
+    container.innerHTML = '<p>Error loading leaderboard 😢</p>';
+    console.error(e);
+  }
+});
+
+function renderLeaderboard(rows) {
+  const container = document.getElementById('leaderboard-container');
+  container.innerHTML = '';
+
+  const table = document.createElement('table');
+  table.className = 'leaderboard-table';
+
+  const thead = document.createElement('thead');
+  thead.innerHTML = `
+    <tr>
+      <th>Rank</th>
+      <th>Model</th>
+      <th>Type</th>
+      <th>Fewshots</th>
+      <th>Backend</th>
+      <th>Accuracy</th>
+      <th>Date</th>
+    </tr>`;
+  table.appendChild(thead);
+
+  const tbody = document.createElement('tbody');
+  rows.forEach((row, i) => {
+    const tr = document.createElement('tr');
+
+    // Берём только вторую часть после слеша
+    const modelName = row.model.includes('/') ? row.model.split('/')[1] : row.model;
+
+    // Модель с ссылкой
+    const modelCell = document.createElement('td');
+    if (row.url) {
+      const a = document.createElement('a');
+      a.href = row.url;
+      a.target = "_blank";
+      a.rel = "noopener";
+      a.textContent = modelName;   // <-- здесь только вторая часть
+      modelCell.appendChild(a);
+    } else {
+      modelCell.textContent = modelName;
+    }
+
+    // Accuracy
+    const accuracyCell = document.createElement('td');
+    const barContainer = document.createElement('div');
+    barContainer.className = 'accuracy-bar';
+    const fill = document.createElement('div');
+    fill.className = 'fill';
+    fill.style.width = `${(row.accuracy*100).toFixed(2)}%`;
+    const text = document.createElement('span');
+    text.textContent = `${(row.accuracy*100).toFixed(2)}%`;
+    barContainer.appendChild(fill);
+    barContainer.appendChild(text);
+    accuracyCell.appendChild(barContainer);
+
+    // Вставка остальных ячеек
+    tr.innerHTML += `<td>${i+1}</td>`;
+    tr.appendChild(modelCell);
+    tr.innerHTML += `
+      <td>${row.type}</td>
+      <td>${row.fewshots}</td>
+      <td>${row.backend}</td>
+    `;
+    tr.appendChild(accuracyCell);
+    tr.innerHTML += `<td>${row.date}</td>`;
+
+    tbody.appendChild(tr);
+  });
+
+  table.appendChild(tbody);
+  container.appendChild(table);
+}
\ No newline at end of file
diff --git a/docs/_build/html/_static/styles/front_page.css b/docs/_build/html/_static/styles/front_page.css
index 55bce1c..1d3bcfb 100644
--- a/docs/_build/html/_static/styles/front_page.css
+++ b/docs/_build/html/_static/styles/front_page.css
@@ -248,3 +248,70 @@ pre span {
     background: none !important;
     color: inherit !important;
 }
+
+.leaderboard-box {
+  padding: 1rem;
+  background: #fff;
+  border-radius: 12px;
+  box-shadow: 0 6px 20px rgba(0,0,0,0.08);
+  overflow-x: auto;
+}
+
+.leaderboard-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-family: 'Inter', 'Roboto', sans-serif;
+  font-size: 0.95rem;
+  text-align: center;
+}
+
+.leaderboard-table th {
+  background: linear-gradient(180deg, #f6f6f6 0%, #e9e9e9 100%);
+  color: #111827;
+  font-weight: 600;
+  font-size: 0.95rem;
+  padding: 14px 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  border-bottom: 2px solid #ddd;
+  text-align: center;
+  box-shadow: inset 0 -1px 0 rgba(0,0,0,0.05);
+}
+
+.leaderboard-table td {
+  padding: 10px;
+  border-bottom: 1px solid #e0e0e0;
+}
+
+.leaderboard-table tbody tr:nth-child(even) {
+  background-color: #f9f9f9;
+}
+
+/* Accuracy bar */
+.accuracy-bar {
+  position: relative;
+  width: 100%;
+  height: 20px;
+  background: #e0e0e0;
+  border-radius: 10px;
+  overflow: hidden;
+}
+
+.accuracy-bar .fill {
+  height: 100%;
+  background: linear-gradient(90deg,#4caf50,#81c784);
+  border-radius: 10px 0 0 10px;
+}
+
+.accuracy-bar span {
+  position: absolute;
+  width: 100%;
+  text-align: center;
+  top: 0;
+  left: 0;
+  font-size: 0.8rem;
+  font-weight: 600;
+  line-height: 20px;
+  color: #000;
+}
+
diff --git a/docs/_build/html/docs/evaluation.html b/docs/_build/html/docs/evaluation.html
index 8f68b02..22d13e4 100644
--- a/docs/_build/html/docs/evaluation.html
+++ b/docs/_build/html/docs/evaluation.html
@@ -5,21 +5,23 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
-    <title>Evaluation API Reference &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>Evaluation API Reference &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=9e26f69c" />
-    <script src="../_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=047bfc79" />
+    <script src="../_static/documentation_options.js?v=e2f3408e"></script>
     <script src="../_static/doctools.js?v=9bcbadda"></script>
     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="../_static/copybutton.js?v=ccdb6887"></script>
-    <script src="../_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="../_static/scripts/front_page.js?v=0a79e239"></script>
     <link rel="icon" href="../_static/favicon.png"/>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="prev" title="Inference API Reference" href="inference.html" /> 
+    <link rel="prev" title="Inference API Reference" href="inference.html" />
+    <link rel="prev" title="Inference API Reference" href="inference.html" />
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
       <h3>Navigation</h3>
@@ -30,17 +32,20 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="inference.html" title="Inference API Reference"
              accesskey="P">previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li>
+        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+
   <section id="evaluation-api-reference">
 <h1>Evaluation API Reference<a class="headerlink" href="#evaluation-api-reference" title="Link to this heading">¶</a></h1>
 <p>The <cite>evaluate()</cite> function allows you to benchmark Text-to-SQL model outputs
@@ -173,6 +178,7 @@ <h3><a href="../index.html">Table of Contents</a></h3>
 <li><a class="reference internal" href="#input-format">Input Format</a></li>
 <li><a class="reference internal" href="#output-metrics">Output Metrics</a></li>
 <li><a class="reference internal" href="#report-saving">Report Saving</a></li>
+<li><a class="reference internal" href="#report-saving">Report Saving</a></li>
 </ul>
 </li>
 </ul>
@@ -213,12 +219,13 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="inference.html" title="Inference API Reference"
              >previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" >LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li>
+        <li class="nav-item nav-item-this"><a href="">Evaluation API Reference</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_build/html/docs/index.html b/docs/_build/html/docs/index.html
index 9c50489..9c26bf9 100644
--- a/docs/_build/html/docs/index.html
+++ b/docs/_build/html/docs/index.html
@@ -5,22 +5,24 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
-    <title>LLMSQL package Documentation &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>LLMSQL package Documentation &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=9e26f69c" />
-    <script src="../_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=047bfc79" />
+    <script src="../_static/documentation_options.js?v=e2f3408e"></script>
     <script src="../_static/doctools.js?v=9bcbadda"></script>
     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="../_static/copybutton.js?v=ccdb6887"></script>
-    <script src="../_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="../_static/scripts/front_page.js?v=0a79e239"></script>
     <link rel="icon" href="../_static/favicon.png"/>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Usage Overview" href="usage.html" />
-    <link rel="prev" title="&lt;no title&gt;" href="../index.html" /> 
+    <link rel="prev" title="&lt;no title&gt;" href="../index.html" />
+    <link rel="prev" title="&lt;no title&gt;" href="../index.html" />
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
       <h3>Navigation</h3>
@@ -34,16 +36,18 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="../index.html" title="&lt;no title&gt;"
              accesskey="P">previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">LLMSQL package Documentation</a></li> 
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">LLMSQL package Documentation</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+
   <section id="llmsql-package-documentation">
 <h1>LLMSQL package Documentation<a class="headerlink" href="#llmsql-package-documentation" title="Link to this heading">¶</a></h1>
 <a href="../index.html" class="sidebar-button">
@@ -165,11 +169,11 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="../index.html" title="&lt;no title&gt;"
              >previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">LLMSQL package Documentation</a></li> 
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">LLMSQL package Documentation</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_build/html/docs/inference.html b/docs/_build/html/docs/inference.html
index 61c12bb..2aa340f 100644
--- a/docs/_build/html/docs/inference.html
+++ b/docs/_build/html/docs/inference.html
@@ -5,22 +5,24 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
-    <title>Inference API Reference &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>Inference API Reference &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=9e26f69c" />
-    <script src="../_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=047bfc79" />
+    <script src="../_static/documentation_options.js?v=e2f3408e"></script>
     <script src="../_static/doctools.js?v=9bcbadda"></script>
     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="../_static/copybutton.js?v=ccdb6887"></script>
-    <script src="../_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="../_static/scripts/front_page.js?v=0a79e239"></script>
     <link rel="icon" href="../_static/favicon.png"/>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Evaluation API Reference" href="evaluation.html" />
-    <link rel="prev" title="Usage Overview" href="usage.html" /> 
+    <link rel="prev" title="Usage Overview" href="usage.html" />
+    <link rel="prev" title="Usage Overview" href="usage.html" />
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
       <h3>Navigation</h3>
@@ -34,17 +36,22 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="usage.html" title="Usage Overview"
              accesskey="P">previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li>
+        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+  <section id="inference-api-reference">
+<h1>Inference API Reference<a class="headerlink" href="#inference-api-reference" title="Link to this heading">¶</a></h1>
+
   <section id="inference-api-reference">
 <h1>Inference API Reference<a class="headerlink" href="#inference-api-reference" title="Link to this heading">¶</a></h1>
 <p>—</p>
@@ -103,12 +110,13 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="usage.html" title="Usage Overview"
              >previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" >LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li>
+        <li class="nav-item nav-item-this"><a href="">Inference API Reference</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_build/html/docs/usage.html b/docs/_build/html/docs/usage.html
index 643646c..2e73e7a 100644
--- a/docs/_build/html/docs/usage.html
+++ b/docs/_build/html/docs/usage.html
@@ -5,22 +5,24 @@
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
-    <title>Usage Overview &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>Usage Overview &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="../_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=9e26f69c" />
-    <script src="../_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="../_static/styles/front_page.css?v=1dd3502e" />
+    <script src="../_static/documentation_options.js?v=e2f3408e"></script>
     <script src="../_static/doctools.js?v=9bcbadda"></script>
     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="../_static/copybutton.js?v=ccdb6887"></script>
-    <script src="../_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="../_static/scripts/front_page.js?v=a8779fe5"></script>
     <link rel="icon" href="../_static/favicon.png"/>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Inference API Reference" href="inference.html" />
-    <link rel="prev" title="LLMSQL package Documentation" href="index.html" /> 
+    <link rel="prev" title="LLMSQL package Documentation" href="index.html" />
+    <link rel="prev" title="LLMSQL package Documentation" href="index.html" />
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
       <h3>Navigation</h3>
@@ -34,17 +36,20 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="index.html" title="LLMSQL package Documentation"
              accesskey="P">previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li>
+        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+
   <section id="usage-overview">
 <h1>Usage Overview<a class="headerlink" href="#usage-overview" title="Link to this heading">¶</a></h1>
 <p>LLMSQL package provides two primary components:</p>
@@ -76,10 +81,12 @@ <h2>Basic Example<a class="headerlink" href="#basic-example" title="Link to this
     <span class="n">batch_size</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span>
     <span class="n">max_new_tokens</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span>
     <span class="n">temperature</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span>
+    <span class="n">model_kwargs</span><span class="o">=</span><span class="p">{</span>
     <span class="n">model_kwargs</span><span class="o">=</span><span class="p">{</span>
         <span class="s2">&quot;attn_implementation&quot;</span><span class="p">:</span> <span class="s2">&quot;flash_attention_2&quot;</span><span class="p">,</span>
         <span class="s2">&quot;torch_dtype&quot;</span><span class="p">:</span> <span class="s2">&quot;bfloat16&quot;</span><span class="p">,</span>
     <span class="p">},</span>
+    <span class="n">generation_kwargs</span><span class="o">=</span><span class="p">{</span>
     <span class="n">generation_kwargs</span><span class="o">=</span><span class="p">{</span>
         <span class="s2">&quot;do_sample&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
     <span class="p">},</span>
@@ -185,12 +192,13 @@ <h3>Navigation</h3>
         <li class="right" >
           <a href="index.html" title="LLMSQL package Documentation"
              >previous</a> |</li>
-        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-0"><a href="../index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
           <li class="nav-item nav-item-1"><a href="index.html" >LLMSQL package Documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li> 
+        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li>
+        <li class="nav-item nav-item-this"><a href="">Usage Overview</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_build/html/genindex.html b/docs/_build/html/genindex.html
index 73f757d..2caaec9 100644
--- a/docs/_build/html/genindex.html
+++ b/docs/_build/html/genindex.html
@@ -4,20 +4,22 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Index &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>Index &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/front_page.css?v=9e26f69c" />
-    <script src="_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="_static/styles/front_page.css?v=047bfc79" />
+    <script src="_static/documentation_options.js?v=e2f3408e"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="_static/copybutton.js?v=ccdb6887"></script>
-    <script src="_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="_static/scripts/front_page.js?v=0a79e239"></script>
     <link rel="icon" href="_static/favicon.png"/>
     <link rel="index" title="Index" href="#" />
-    <link rel="search" title="Search" href="search.html" /> 
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="search" title="Search" href="search.html" />
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
       <h3>Navigation</h3>
@@ -25,21 +27,24 @@ <h3>Navigation</h3>
         <li class="right" style="margin-right: 10px">
           <a href="#" title="General Index"
              accesskey="I">index</a></li>
-        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Index</a></li> 
+        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">Index</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+
 
 <h1 id="index">Index</h1>
 
 <div class="genindex-jumpbox">
- 
+
+
 </div>
 
 
@@ -69,11 +74,11 @@ <h3>Navigation</h3>
         <li class="right" style="margin-right: 10px">
           <a href="#" title="General Index"
              >index</a></li>
-        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Index</a></li> 
+        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">Index</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_build/html/index.html b/docs/_build/html/index.html
index e77de67..bd4199d 100644
--- a/docs/_build/html/index.html
+++ b/docs/_build/html/index.html
@@ -153,15 +153,9 @@ <h2 id="links">🔗 Resources</h2>
       </tbody>
     </table>
 
-    <h2 id="leaderboard">📊 Leaderboard [in progress]</h2>
-    <div class="custom-highlight-box">
-      <p>
-        The official Leaderboard is currently empty and <strong>in progress</strong>.
-        <a href="https://github.com/LLMSQL/llmsql-benchmark/issues" target="_blank">
-          Submit
-        </a>
-        your model results to be the first on the ranking!
-      </p>
+    <h2 id="leaderboard">📊 Leaderboard — Execution Accuracy (EX)</h2>
+    <div id="leaderboard-container" class="custom-highlight-box">
+      <p>Loading leaderboard...</p>
     </div>
 
 
@@ -169,7 +163,7 @@ <h2 id="citation">📄 Citation</h2>
     <pre><code>@inproceedings{llmsql_bench,
   title={LLMSQL: Upgrading WikiSQL for the LLM Era of Text-to-SQL},
   author={Pihulski, Dzmitry and Charchut, Karol and Novogrodskaia, Viktoria and Koco{'n}, Jan},
-  booktitle={2025 IEEE ICDMW},
+  booktitle={2025 IEEE ICувцDMW},
   year={2025},
   organization={IEEE}
 }
diff --git a/docs/_build/html/leaderboard.json b/docs/_build/html/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/docs/_build/html/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/docs/_build/html/search.html b/docs/_build/html/search.html
index b536f93..1a05821 100644
--- a/docs/_build/html/search.html
+++ b/docs/_build/html/search.html
@@ -4,18 +4,19 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Search &#8212; LLMSQL 0.1.14 documentation</title>
+    <title>Search &#8212; LLMSQL 0.1.15 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=5349f25f" />
     <link rel="stylesheet" type="text/css" href="_static/basic.css?v=29da98fa" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=29da98fa" />
     <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/front_page.css?v=9e26f69c" />
-    
-    <script src="_static/documentation_options.js?v=468bc497"></script>
+    <link rel="stylesheet" type="text/css" href="_static/styles/front_page.css?v=047bfc79" />
+
+    <script src="_static/documentation_options.js?v=e2f3408e"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
     <script src="_static/clipboard.min.js?v=a7894cd8"></script>
     <script src="_static/copybutton.js?v=ccdb6887"></script>
-    <script src="_static/scripts/front_page.js?v=a59558f4"></script>
+    <script src="_static/scripts/front_page.js?v=0a79e239"></script>
     <script src="_static/searchtools.js"></script>
     <script src="_static/language_data.js"></script>
     <link rel="icon" href="_static/favicon.png"/>
@@ -23,7 +24,8 @@
     <link rel="search" title="Search" href="#" />
     <script src="searchindex.js" defer="defer"></script>
     <meta name="robots" content="noindex" />
-     
+
+
 
   </head><body>
     <div class="related" role="navigation" aria-label="Related">
@@ -32,18 +34,21 @@ <h3>Navigation</h3>
         <li class="right" style="margin-right: 10px">
           <a href="genindex.html" title="General Index"
              accesskey="I">index</a></li>
-        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Search</a></li> 
+        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">Search</a></li>
       </ul>
-    </div>  
+    </div>
+    </div>
 
     <div class="document">
       <div class="documentwrapper">
         <div class="bodywrapper">
           <div class="body" role="main">
-            
+
+
   <h1 id="search-documentation">Search</h1>
-  
+
+
   <noscript>
   <div class="admonition warning">
   <p>
@@ -52,23 +57,30 @@ <h1 id="search-documentation">Search</h1>
   </p>
   </div>
   </noscript>
-  
-  
+
+
+
+
   <p>
     Searching for multiple words only shows matches that contain
     all words.
   </p>
-  
-  
+
+
+
+
   <form action="" method="get">
     <input type="text" name="q" aria-labelledby="search-documentation" value="" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
     <input type="submit" value="search" />
     <span id="search-progress" style="padding-left: 10px"></span>
   </form>
-  
-  
+
+
+
+
   <div id="search-results"></div>
-  
+
+
 
             <div class="clearer"></div>
           </div>
@@ -86,11 +98,11 @@ <h3>Navigation</h3>
         <li class="right" style="margin-right: 10px">
           <a href="genindex.html" title="General Index"
              >index</a></li>
-        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.14 documentation</a> &#187;</li>
-        <li class="nav-item nav-item-this"><a href="">Search</a></li> 
+        <li class="nav-item nav-item-0"><a href="index.html">LLMSQL 0.1.15 documentation</a> &#187;</li>
+        <li class="nav-item nav-item-this"><a href="">Search</a></li>
       </ul>
     </div>
     <div class="footer" role="contentinfo">
     </div>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/_static/leaderboard.json b/docs/_static/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/docs/_static/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/docs/_static/scripts/front_page.js b/docs/_static/scripts/front_page.js
index 03fd423..4f2cdc5 100644
--- a/docs/_static/scripts/front_page.js
+++ b/docs/_static/scripts/front_page.js
@@ -48,3 +48,86 @@ if (searchInput) {
     }
   });
 }
+
+document.addEventListener("DOMContentLoaded", async () => {
+  const container = document.getElementById('leaderboard-container');
+  if (!container) return;
+
+  try {
+    const response = await fetch('_static/leaderboard.json');
+    const rows = await response.json();
+    renderLeaderboard(rows);
+  } catch (e) {
+    container.innerHTML = '<p>Error loading leaderboard 😢</p>';
+    console.error(e);
+  }
+});
+
+function renderLeaderboard(rows) {
+  const container = document.getElementById('leaderboard-container');
+  container.innerHTML = '';
+
+  const table = document.createElement('table');
+  table.className = 'leaderboard-table';
+
+  const thead = document.createElement('thead');
+  thead.innerHTML = `
+    <tr>
+      <th>Rank</th>
+      <th>Model</th>
+      <th>Type</th>
+      <th>Fewshots</th>
+      <th>Backend</th>
+      <th>Accuracy</th>
+      <th>Date</th>
+    </tr>`;
+  table.appendChild(thead);
+
+  const tbody = document.createElement('tbody');
+  rows.forEach((row, i) => {
+    const tr = document.createElement('tr');
+
+    const modelName = row.model.includes('/') ? row.model.split('/')[1] : row.model;
+
+    const modelCell = document.createElement('td');
+    if (row.url) {
+      const a = document.createElement('a');
+      a.href = row.url;
+      a.target = "_blank";
+      a.rel = "noopener";
+      a.textContent = modelName;   // <-- здесь только вторая часть
+      modelCell.appendChild(a);
+    } else {
+      modelCell.textContent = modelName;
+    }
+
+    // Accuracy
+    const accuracyCell = document.createElement('td');
+    const barContainer = document.createElement('div');
+    barContainer.className = 'accuracy-bar';
+    const fill = document.createElement('div');
+    fill.className = 'fill';
+    fill.style.width = `${(row.accuracy*100).toFixed(2)}%`;
+    const text = document.createElement('span');
+    text.textContent = `${(row.accuracy*100).toFixed(2)}%`;
+    barContainer.appendChild(fill);
+    barContainer.appendChild(text);
+    accuracyCell.appendChild(barContainer);
+
+
+    tr.innerHTML += `<td>${i+1}</td>`;
+    tr.appendChild(modelCell);
+    tr.innerHTML += `
+      <td>${row.type}</td>
+      <td>${row.fewshots}</td>
+      <td>${row.backend}</td>
+    `;
+    tr.appendChild(accuracyCell);
+    tr.innerHTML += `<td>${row.date}</td>`;
+
+    tbody.appendChild(tr);
+  });
+
+  table.appendChild(tbody);
+  container.appendChild(table);
+}
\ No newline at end of file
diff --git a/docs/_static/styles/front_page.css b/docs/_static/styles/front_page.css
index 55bce1c..1d3bcfb 100644
--- a/docs/_static/styles/front_page.css
+++ b/docs/_static/styles/front_page.css
@@ -248,3 +248,70 @@ pre span {
     background: none !important;
     color: inherit !important;
 }
+
+.leaderboard-box {
+  padding: 1rem;
+  background: #fff;
+  border-radius: 12px;
+  box-shadow: 0 6px 20px rgba(0,0,0,0.08);
+  overflow-x: auto;
+}
+
+.leaderboard-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-family: 'Inter', 'Roboto', sans-serif;
+  font-size: 0.95rem;
+  text-align: center;
+}
+
+.leaderboard-table th {
+  background: linear-gradient(180deg, #f6f6f6 0%, #e9e9e9 100%);
+  color: #111827;
+  font-weight: 600;
+  font-size: 0.95rem;
+  padding: 14px 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  border-bottom: 2px solid #ddd;
+  text-align: center;
+  box-shadow: inset 0 -1px 0 rgba(0,0,0,0.05);
+}
+
+.leaderboard-table td {
+  padding: 10px;
+  border-bottom: 1px solid #e0e0e0;
+}
+
+.leaderboard-table tbody tr:nth-child(even) {
+  background-color: #f9f9f9;
+}
+
+/* Accuracy bar */
+.accuracy-bar {
+  position: relative;
+  width: 100%;
+  height: 20px;
+  background: #e0e0e0;
+  border-radius: 10px;
+  overflow: hidden;
+}
+
+.accuracy-bar .fill {
+  height: 100%;
+  background: linear-gradient(90deg,#4caf50,#81c784);
+  border-radius: 10px 0 0 10px;
+}
+
+.accuracy-bar span {
+  position: absolute;
+  width: 100%;
+  text-align: center;
+  top: 0;
+  left: 0;
+  font-size: 0.8rem;
+  font-weight: 600;
+  line-height: 20px;
+  color: #000;
+}
+
diff --git a/docs/_templates/index.html b/docs/_templates/index.html
index 65bcb22..a93b256 100644
--- a/docs/_templates/index.html
+++ b/docs/_templates/index.html
@@ -153,15 +153,9 @@ <h2 id="links">🔗 Resources</h2>
       </tbody>
     </table>
 
-    <h2 id="leaderboard">📊 Leaderboard [in progress]</h2>
-    <div class="custom-highlight-box">
-      <p>
-        The official Leaderboard is currently empty and <strong>in progress</strong>.
-        <a href="https://github.com/LLMSQL/llmsql-benchmark/issues" target="_blank">
-          Submit
-        </a>
-        your model results to be the first on the ranking!
-      </p>
+    <h2 id="leaderboard">📊 Leaderboard — Execution Accuracy (EX)</h2>
+    <div id="leaderboard-container" class="custom-highlight-box">
+      <p>Loading leaderboard...</p>
     </div>
 
 
@@ -169,7 +163,7 @@ <h2 id="citation">📄 Citation</h2>
     <pre><code>@inproceedings{llmsql_bench,
   title={LLMSQL: Upgrading WikiSQL for the LLM Era of Text-to-SQL},
   author={Pihulski, Dzmitry and Charchut, Karol and Novogrodskaia, Viktoria and Koco{'n}, Jan},
-  booktitle={2025 IEEE ICDMW},
+  booktitle={2025 IEEE ICувцDMW},
   year={2025},
   organization={IEEE}
 }
diff --git a/leaderboard/Llama-3.2-1B-Instruct/5fewshots/inference_script.py b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/inference_script.py
new file mode 100644
index 0000000..1049e44
--- /dev/null
+++ b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Llama-3.2-1B-Instruct/5fewshots/requirements.txt b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Llama-3.2-1B-Instruct/5fewshots/run.yaml b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/run.yaml
new file mode 100644
index 0000000..7aaea2e
--- /dev/null
+++ b/leaderboard/Llama-3.2-1B-Instruct/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: meta-llama/Llama-3.2-1B-Instruct
+  revision: main
+  commit_hash: 9213176726f574b556790deb65791e0c5aa438b6
+  parameter_count: 1B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.2678
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Llama-3.2-1B-Instruct/5fewshots/Llama-3.2-1B-Instruct_outputs.jsonl
diff --git a/leaderboard/Llama-3.2-3B-Instruct/5fewshots/inference_script.py b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/inference_script.py
new file mode 100644
index 0000000..a3400c8
--- /dev/null
+++ b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Llama-3.2-3B-Instruct/5fewshots/requirements.txt b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Llama-3.2-3B-Instruct/5fewshots/run.yaml b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/run.yaml
new file mode 100644
index 0000000..57616a3
--- /dev/null
+++ b/leaderboard/Llama-3.2-3B-Instruct/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: meta-llama/Llama-3.2-3B-Instruct
+  revision: main
+  commit_hash: 0cb88a4f764b7a12671c53f0838cd831a0843b95
+  parameter_count: 3B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.5415
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Llama-3.2-3B-Instruct/5fewshots/Llama-3.2-3B-Instruct_outputs.jsonl
diff --git a/leaderboard/Llama-3.3-70B-Instruct/5fewshots/inference_script.py b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/inference_script.py
new file mode 100644
index 0000000..4efc97e
--- /dev/null
+++ b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "meta-llama/Llama-3.3-70B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Llama-3.3-70B-Instruct/5fewshots/requirements.txt b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Llama-3.3-70B-Instruct/5fewshots/run.yaml b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/run.yaml
new file mode 100644
index 0000000..1e966f9
--- /dev/null
+++ b/leaderboard/Llama-3.3-70B-Instruct/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: meta-llama/Llama-3.3-70B-Instruct
+  revision: main
+  commit_hash: 6f6073b423013f6a7d4d9f39144961bfbfbc386b
+  parameter_count: 70B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.8607
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Llama-3.3-70B-Instruct/5fewshots/Llama-3.3-70B-Instruct_outputs.jsonl
diff --git a/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/inference_script.py b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/inference_script.py
new file mode 100644
index 0000000..ef0562c
--- /dev/null
+++ b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "mistralai/Mistral-Nemo-Instruct-2407"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/requirements.txt b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/run.yaml b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/run.yaml
new file mode 100644
index 0000000..7914a99
--- /dev/null
+++ b/leaderboard/Mistral-Nemo-Instruct-2407/5fewshots/run.yaml
@@ -0,0 +1,56 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: mistralai/Mistral-Nemo-Instruct-2407
+  revision: main
+  commit_hash: 04d8a90549d23fc6bd7f642064003592df51e9b3
+  parameter_count: 12B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.7599
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Mistral-Nemo-Instruct-2407/5fewshots/Mistral-Nemo-Instruct-2407_outputs.jsonl
diff --git a/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/inference_script.py b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/inference_script.py
new file mode 100644
index 0000000..fa7cfb7
--- /dev/null
+++ b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/inference_script.py
@@ -0,0 +1,26 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=True,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=1024,
+    temperature=1.0,
+    sampling_kwargs={"top_p": 1.0},
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/requirements.txt b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/run.yaml b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/run.yaml
new file mode 100644
index 0000000..a2d5154
--- /dev/null
+++ b/leaderboard/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/run.yaml
@@ -0,0 +1,59 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
+  revision: main
+  commit_hash: 5a48de7e98cce824b3456eb9857ded839c3b6475
+  parameter_count: 30B
+  dtype: bfloat16
+  thinking: true
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: true
+    max_new_tokens: 1024
+    temperature: 1.0
+    sampling_kwargs:
+      top_p: 1.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.8519
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/5fewshots/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16_outputs.jsonl
diff --git a/leaderboard/PLLuM-12B-chat/5fewshots/inference_script.py b/leaderboard/PLLuM-12B-chat/5fewshots/inference_script.py
new file mode 100644
index 0000000..59bfa27
--- /dev/null
+++ b/leaderboard/PLLuM-12B-chat/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "CYFRAGOVPL/PLLuM-12B-chat"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/PLLuM-12B-chat/5fewshots/requirements.txt b/leaderboard/PLLuM-12B-chat/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/PLLuM-12B-chat/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/PLLuM-12B-chat/5fewshots/run.yaml b/leaderboard/PLLuM-12B-chat/5fewshots/run.yaml
new file mode 100644
index 0000000..6a2ab5f
--- /dev/null
+++ b/leaderboard/PLLuM-12B-chat/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: CYFRAGOVPL/PLLuM-12B-chat
+  revision: main
+  commit_hash: 74d80ff96552d9555f6f6f28321433da3895d2ec
+  parameter_count: 12B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.5224
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/PLLuM-12B-chat/5fewshots/PLLuM-12B-chat_outputs.jsonl
diff --git a/leaderboard/PLLuM-12B-nc-chat/5fewshots/inference_script.py b/leaderboard/PLLuM-12B-nc-chat/5fewshots/inference_script.py
new file mode 100644
index 0000000..372e696
--- /dev/null
+++ b/leaderboard/PLLuM-12B-nc-chat/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "CYFRAGOVPL/PLLuM-12B-nc-chat"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/PLLuM-12B-nc-chat/5fewshots/requirements.txt b/leaderboard/PLLuM-12B-nc-chat/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/PLLuM-12B-nc-chat/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/PLLuM-12B-nc-chat/5fewshots/run.yaml b/leaderboard/PLLuM-12B-nc-chat/5fewshots/run.yaml
new file mode 100644
index 0000000..819021e
--- /dev/null
+++ b/leaderboard/PLLuM-12B-nc-chat/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: CYFRAGOVPL/PLLuM-12B-nc-chat
+  revision: main
+  commit_hash: 7089352cfc2efbd2d3c64cc8cd5c97cd2c4fc013
+  parameter_count: 12B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.4044
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/PLLuM-12B-nc-chat/5fewshots/PLLuM-12B-nc-chat_outputs.jsonl
diff --git a/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/inference_script.py b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/inference_script.py
new file mode 100644
index 0000000..c8af571
--- /dev/null
+++ b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/requirements.txt b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/run.yaml b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/run.yaml
new file mode 100644
index 0000000..47c8f25
--- /dev/null
+++ b/leaderboard/Qwen2.5-1.5B-Instruct/5fewshots/run.yaml
@@ -0,0 +1,57 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: Qwen/Qwen2.5-1.5B-Instruct
+  revision: main
+  commit_hash: 989aa7980e4cf806f80c7fef2b1adb7bc71aa306
+  parameter_count: 1.5B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.6401
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Qwen2.5-1.5B-Instruct/5fewshots/Qwen2.5-1.5B-Instruct_outputs.jsonl
diff --git a/leaderboard/Qwen2.5-7B-Instruct/5fewshots/inference_script.py b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/inference_script.py
new file mode 100644
index 0000000..e463467
--- /dev/null
+++ b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Qwen2.5-7B-Instruct/5fewshots/requirements.txt b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Qwen2.5-7B-Instruct/5fewshots/run.yaml b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/run.yaml
new file mode 100644
index 0000000..0492a50
--- /dev/null
+++ b/leaderboard/Qwen2.5-7B-Instruct/5fewshots/run.yaml
@@ -0,0 +1,56 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: Qwen/Qwen2.5-7B-Instruct
+  revision: main
+  commit_hash: a09a35458c702b33eeacc393d103063234e8bc28
+  parameter_count: 7B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.7940
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Qwen2.5-7B-Instruct/5fewshots/Qwen2.5-7B-Instruct_outputs.jsonl
diff --git a/leaderboard/Qwen3-0.6B/5fewshots/inference_script.py b/leaderboard/Qwen3-0.6B/5fewshots/inference_script.py
new file mode 100644
index 0000000..30b281f
--- /dev/null
+++ b/leaderboard/Qwen3-0.6B/5fewshots/inference_script.py
@@ -0,0 +1,26 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=True,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=1024,
+    temperature=0.6,
+    sampling_kwargs={"top_p": 0.95, "top_k": 20, "min_p": 0},
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/Qwen3-0.6B/5fewshots/requirements.txt b/leaderboard/Qwen3-0.6B/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/Qwen3-0.6B/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/Qwen3-0.6B/5fewshots/run.yaml b/leaderboard/Qwen3-0.6B/5fewshots/run.yaml
new file mode 100644
index 0000000..ba714f8
--- /dev/null
+++ b/leaderboard/Qwen3-0.6B/5fewshots/run.yaml
@@ -0,0 +1,60 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: Qwen/Qwen3-0.6B
+  revision: main
+  commit_hash: c1899de289a04d12100db370d81485cdf75e47ca
+  parameter_count: 0.6B
+  dtype: bfloat16
+  thinking: true
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: true
+    max_new_tokens: 1024
+    temperature: 0.6
+    sampling_kwargs:
+      top_p: 0.95
+      top_k: 20
+      min_p: 0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.4983
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/Qwen3-0.6B/5fewshots/Qwen3-0.6B_outputs.jsonl
diff --git a/leaderboard/docs/_build/html/_static/leaderboard.json b/leaderboard/docs/_build/html/_static/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/leaderboard/docs/_build/html/_static/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/leaderboard/docs/_static/leaderboard.json b/leaderboard/docs/_static/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/leaderboard/docs/_static/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/leaderboard/generate_leaderboard.py b/leaderboard/generate_leaderboard.py
new file mode 100644
index 0000000..b795556
--- /dev/null
+++ b/leaderboard/generate_leaderboard.py
@@ -0,0 +1,34 @@
+import yaml
+import json
+from pathlib import Path
+import shutil
+
+BASE_DIR = Path(__file__).parent
+DOCS_DIR = Path(__file__).parent.parent / "docs/_static"
+BUILD_DIR = DOCS_DIR / "_build/html"
+
+rows = []
+
+for path in BASE_DIR.rglob("run.yaml"):
+    with open(path) as f:
+        data = yaml.safe_load(f)
+
+    rows.append({
+        "model": data["model"]["name"],
+        "type": data.get("type", ""),
+        "fewshots": data["inference"]["arguments"]["num_fewshots"],
+        "backend": data["inference"]["backend"],
+        "accuracy": data["results"]["execution_accuracy"],
+        "date": str(data["date"]),
+    })
+
+rows.sort(key=lambda x: x["accuracy"], reverse=True)
+
+json_file = DOCS_DIR / "leaderboard.json"
+with open(json_file, "w") as f:
+    json.dump(rows, f, indent=2)
+
+if BUILD_DIR.exists():
+    shutil.copy(json_file, BUILD_DIR / "leaderboard.json")
+
+print(f"✅ leaderboard.json in {json_file}")
\ No newline at end of file
diff --git a/leaderboard/gpt-oss-120b/5fewshots/inference_script.py b/leaderboard/gpt-oss-120b/5fewshots/inference_script.py
new file mode 100644
index 0000000..e99d509
--- /dev/null
+++ b/leaderboard/gpt-oss-120b/5fewshots/inference_script.py
@@ -0,0 +1,26 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "openai/gpt-oss-120b"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=True,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=1024,
+    temperature=1.0,
+    sampling_kwargs={"top_p": 0.95, "repetition_penalty": 1.0},
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/gpt-oss-120b/5fewshots/requirements.txt b/leaderboard/gpt-oss-120b/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/gpt-oss-120b/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/gpt-oss-120b/5fewshots/run.yaml b/leaderboard/gpt-oss-120b/5fewshots/run.yaml
new file mode 100644
index 0000000..fe878e3
--- /dev/null
+++ b/leaderboard/gpt-oss-120b/5fewshots/run.yaml
@@ -0,0 +1,60 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: openai/gpt-oss-120b
+  revision: main
+  commit_hash: b5c939de8f754692c1647ca79fbf85e8c1e70f8a
+  parameter_count: 120B
+  dtype: bfloat16
+  thinking: true
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: true
+    max_new_tokens: 1024
+    temperature: 1.0
+    sampling_kwargs:
+      top_p: 0.95
+      repetition_penalty: 1.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.9049
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/gpt-oss-120b/5fewshots/gpt-oss-120b_outputs.jsonl
diff --git a/leaderboard/gpt-oss-20b/5fewshots/inference_script.py b/leaderboard/gpt-oss-20b/5fewshots/inference_script.py
new file mode 100644
index 0000000..3125855
--- /dev/null
+++ b/leaderboard/gpt-oss-20b/5fewshots/inference_script.py
@@ -0,0 +1,26 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "openai/gpt-oss-20b"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=True,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=1024,
+    temperature=1.0,
+    sampling_kwargs={"top_p": 0.95, "repetition_penalty": 1.0},
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/gpt-oss-20b/5fewshots/requirements.txt b/leaderboard/gpt-oss-20b/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/gpt-oss-20b/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/gpt-oss-20b/5fewshots/run.yaml b/leaderboard/gpt-oss-20b/5fewshots/run.yaml
new file mode 100644
index 0000000..74b6638
--- /dev/null
+++ b/leaderboard/gpt-oss-20b/5fewshots/run.yaml
@@ -0,0 +1,59 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: openai/gpt-oss-20b
+  revision: main
+  commit_hash: 6cee5e81ee83917806bbde320786a8fb61efebee
+  parameter_count: 20B
+  dtype: bfloat16
+  thinking: true
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: true
+    max_new_tokens: 1024
+    temperature: 1.0
+    sampling_kwargs:
+      top_p: 0.95
+      repetition_penalty: 1.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.8871
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/gpt-oss-20b/5fewshots/gpt-oss-20b_outputs.jsonl
diff --git a/leaderboard/leaderboard.json b/leaderboard/leaderboard.json
new file mode 100644
index 0000000..c842948
--- /dev/null
+++ b/leaderboard/leaderboard.json
@@ -0,0 +1,106 @@
+[
+  {
+    "model": "openai/gpt-oss-120b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.9049,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "openai/gpt-oss-20b",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8871,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.3-70B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8607,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.8519,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-7B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.794,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.7599,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen2.5-1.5B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.6401,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5415,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.5224,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "Qwen/Qwen3-0.6B",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4983,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.4044,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "CYFRAGOVPL/pllum-12b-nc-chat-250715",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.3727,
+    "date": "2026-02-24"
+  },
+  {
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "type": "open-source",
+    "fewshots": 5,
+    "backend": "vllm",
+    "accuracy": 0.2678,
+    "date": "2026-02-24"
+  }
+]
\ No newline at end of file
diff --git a/leaderboard/pllum-12b-nc-chat-250715/5fewshots/inference_script.py b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/inference_script.py
new file mode 100644
index 0000000..c2e8457
--- /dev/null
+++ b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/inference_script.py
@@ -0,0 +1,25 @@
+import os
+
+from dotenv import load_dotenv
+
+from llmsql import evaluate, inference_vllm
+
+load_dotenv()
+
+MODEL_NAME = "CYFRAGOVPL/pllum-12b-nc-chat-250715"
+
+results = inference_vllm(
+    model_name=MODEL_NAME,
+    output_file=f"{MODEL_NAME}_outputs.jsonl",
+    batch_size=20000,
+    tensor_parallel_size=4,
+    do_sample=False,
+    hf_token=os.environ["HF_TOKEN"],
+    max_new_tokens=256,
+    temperature=0.0,
+    num_fewshots=5,
+    seed=42,
+    llm_kwargs={"dtype": "bfloat16"},
+)
+
+evaluate(results)
diff --git a/leaderboard/pllum-12b-nc-chat-250715/5fewshots/requirements.txt b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/requirements.txt
new file mode 100644
index 0000000..929f583
--- /dev/null
+++ b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/requirements.txt
@@ -0,0 +1,172 @@
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.83.0
+anyio==4.12.1
+apache-tvm-ffi==0.1.8.post2
+astor==0.8.1
+attrs==25.4.0
+blake3==1.0.8
+cachetools==7.0.1
+cbor2==5.8.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+cloudpickle==3.1.2
+compressed-tensors==0.13.0
+cryptography==46.0.5
+cuda-bindings==13.1.1
+cuda-pathfinder==1.3.5
+cuda-python==13.1.1
+cupy-cuda12x==14.0.1
+datasets==4.5.0
+depyf==0.20.0
+dill==0.4.0
+diskcache==5.6.3
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.17.0
+einops==0.8.2
+email-validator==2.3.0
+fastapi==0.132.0
+fastapi-cli==0.0.23
+fastapi-cloud-cli==0.13.0
+fastar==0.8.0
+filelock==3.24.3
+flashinfer-python==0.6.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+gguf==0.17.1
+grpcio==1.78.1
+grpcio-reflection==1.78.1
+h11==0.16.0
+hf-xet==1.3.0
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==0.36.2
+idna==3.11
+ijson==3.5.0
+interegular==0.3.3
+Jinja2==3.1.6
+jiter==0.13.0
+jmespath==1.1.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+lark==1.2.2
+llguidance==1.3.0
+llmsql==0.1.15
+llvmlite==0.44.0
+lm-format-enforcer==0.11.3
+loguru==0.7.3
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mcp==1.26.0
+mdurl==0.1.2
+mistral_common==1.9.1
+model-hosting-container-standards==0.1.13
+mpmath==1.3.0
+msgpack==1.1.2
+msgspec==0.20.0
+multidict==6.7.1
+multiprocess==0.70.18
+networkx==3.6.1
+ninja==1.13.0
+numba==0.61.2
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cudnn-frontend==1.18.0
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-cutlass-dsl==4.4.0
+nvidia-cutlass-dsl-libs-base==4.4.0
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+openai==2.23.0
+openai-harmony==0.0.8
+opencv-python-headless==4.13.0.92
+outlines_core==0.2.11
+packaging==26.0
+pandas==3.0.1
+partial-json-parser==0.2.1.1.post7
+pillow==12.1.1
+prometheus-fastapi-instrumentator==7.1.0
+prometheus_client==0.24.1
+propcache==0.4.1
+protobuf==6.33.5
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyarrow==23.0.1
+pybase64==1.4.3
+pycountry==26.2.16
+pycparser==3.0
+pydantic==2.12.5
+pydantic-extra-types==2.11.0
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-json-logger==4.0.0
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+ray==2.54.0
+referencing==0.37.0
+regex==2026.2.19
+requests==2.32.5
+rich==14.3.3
+rich-toolkit==0.19.4
+rignore==0.7.6
+rpds-py==0.30.0
+safetensors==0.7.0
+sentencepiece==0.2.1
+sentry-sdk==2.53.0
+setproctitle==1.3.7
+setuptools==80.10.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+supervisor==4.3.0
+sympy==1.14.0
+tabulate==0.9.0
+tiktoken==0.12.0
+tokenizers==0.22.2
+torch==2.9.1
+torchaudio==2.9.1
+torchvision==0.24.1
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.5.1
+typer==0.24.1
+typer-slim==0.24.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.41.0
+uvloop==0.22.1
+vllm==0.15.1
+watchfiles==1.1.1
+websockets==16.0
+xgrammar==0.1.29
+xxhash==3.6.0
+yarl==1.22.0
diff --git a/leaderboard/pllum-12b-nc-chat-250715/5fewshots/run.yaml b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/run.yaml
new file mode 100644
index 0000000..6b48d39
--- /dev/null
+++ b/leaderboard/pllum-12b-nc-chat-250715/5fewshots/run.yaml
@@ -0,0 +1,56 @@
+date: 2026-02-24
+# =====================
+# Model Information
+# =====================
+model:
+  name: CYFRAGOVPL/pllum-12b-nc-chat-250715
+  revision: main
+  commit_hash: 025e26b3fc5ac1fa8714298e671a6cf2418123d7
+  parameter_count: 12B
+  dtype: bfloat16
+  thinking: false
+
+type: open-source  # open-source | proprietary
+
+# =====================
+# Package Information
+# =====================
+llmsql:
+  version: 0.1.15
+  commit_hash: 79175212c90b1fc094abd2c9666c23d903060014
+
+# =====================
+# Benchmark Information
+# =====================
+version: 2.0
+
+# =====================
+# Environment Information
+# =====================
+os_name: Ubuntu 24.04.3 LTS
+python_version: 3.12.12
+pip_freeze: requirements.txt
+device: 4xH200
+
+# =====================
+# Function Inputs / Inference Backend
+# =====================
+inference:
+  backend: vllm  # vllm | transformers
+  arguments:
+    batch_size: 20000
+    tetensor_parallel_size: 4
+    do_sample: false
+    max_new_tokens: 256
+    temperature: 0.0
+    num_fewshots: 5
+    seed: 42
+    llm_kwargs:
+      dtype: bfloat16
+
+# =====================
+# Results
+# =====================
+results:
+  execution_accuracy: 0.3727
+  answers_path: https://huggingface.co/datasets/llmsql-bench/benchmark-evaluation-results/blob/main/pllum-12b-nc-chat-250715/5fewshots/pllum-12b-nc-chat-250715_outputs.jsonl