From d5fdd4acfd2935178112d41f54f19f626be4b55e Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Mon, 15 Dec 2025 01:32:31 +0800
Subject: [PATCH 1/6] [Feature] Add BERT SWAG Multiple Choice application (Ref:
 #ID9IPX)

---
 nlp/README.md                                 |   6 +-
 .../swag_bert_run_result.ipynb                | 909 ++++++++++++++++++
 2 files changed, 912 insertions(+), 3 deletions(-)
 create mode 100644 nlp/multiple_choice/swag_bert_run_result.ipynb
diff --git a/nlp/README.md b/nlp/README.md
index a2c9dae..a96be8f 100644
--- a/nlp/README.md
+++ b/nlp/README.md
@@ -4,9 +4,9 @@ This directory contains ready-to-use Natural Language Processing application not
 
 ## Application List
 
-| No. | Model | Description                     |
-| :-- | :---- | :------------------------------ |
-| 1   | / | This section is empty for now — feel free to contribute your first application! |
+| No. | Model | Description |
+| :-- | :---- | :---------- |
+| 1   | [BERT (SWAG)](./multiple_choice/swag_bert_run_result.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using MindSpore NLP. |
 
 ## Contributing New NLP Applications
 
diff --git a/nlp/multiple_choice/swag_bert_run_result.ipynb b/nlp/multiple_choice/swag_bert_run_result.ipynb
new file mode 100644
index 0000000..f2b7fd1
--- /dev/null
+++ b/nlp/multiple_choice/swag_bert_run_result.ipynb
@@ -0,0 +1,909 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SWAG Multiple Choice Finetune + Inference (MindSpore + MindHF)\n",
+    "\n",
+    "**特性：**\n",
+    "\n",
+    "- 流程：训练 -> 评估 -> 保存 -> 推理演示\n",
+    "\n",
+    "本案例基于 MindSpore 2.7.1 和 MindHF 0.6.0 实现 BERT 模型在 SWAG 数据集上的多选任务微调。\n",
+    "## 1. 导入依赖与环境配置\n",
+    "\n",
+    "导入必要的库，并设置 HuggingFace 镜像加速下载。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "editable": true,
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:14.050863Z",
+     "iopub.status.busy": "2025-12-14T16:21:14.050528Z",
+     "iopub.status.idle": "2025-12-14T16:21:21.091146Z",
+     "shell.execute_reply": "2025-12-14T16:21:21.089643Z"
+    },
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
+      "  setattr(self, word, getattr(machar, word).flat[0])\n",
+      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
+      "  return self._float_to_str(self.smallest_subnormal)\n",
+      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
+      "  setattr(self, word, getattr(machar, word).flat[0])\n",
+      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
+      "  return self._float_to_str(self.smallest_subnormal)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'false'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from dataclasses import dataclass\n",
+    "from typing import Any, Dict, List, Optional, Union\n",
+    "\n",
+    "import numpy as np\n",
+    "import mindspore as ms\n",
+    "\n",
+    "# ----------------------------\n",
+    "# Environment (HF-Mirror)\n",
+    "# ----------------------------\n",
+    "os.environ.setdefault(\"HF_ENDPOINT\", \"https://hf-mirror.com\")\n",
+    "os.environ.setdefault(\"TOKENIZERS_PARALLELISM\", \"false\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. 定义上下文设置函数\n",
+    "\n",
+    "定义设置 MindSpore 运行环境（Ascend/PYNATIVE）和导入 MindHF 组件的辅助函数。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:21.094517Z",
+     "iopub.status.busy": "2025-12-14T16:21:21.094070Z",
+     "iopub.status.idle": "2025-12-14T16:21:21.102099Z",
+     "shell.execute_reply": "2025-12-14T16:21:21.100812Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# ----------------------------\n",
+    "# MindSpore context\n",
+    "# ----------------------------\n",
+    "def set_ms_context():\n",
+    "    ms.set_seed(42)\n",
+    "    ms.set_context(mode=ms.PYNATIVE_MODE)\n",
+    "    try:\n",
+    "        ms.set_device(\"Ascend\", 0)\n",
+    "    except AttributeError:\n",
+    "        ms.set_context(device_target=\"Ascend\", device_id=0)\n",
+    "\n",
+    "\n",
+    "def to_numpy(x) -> np.ndarray:\n",
+    "    if isinstance(x, np.ndarray):\n",
+    "        return x\n",
+    "    if hasattr(x, \"asnumpy\"):\n",
+    "        return x.asnumpy()\n",
+    "    return np.asarray(x)\n",
+    "\n",
+    "\n",
+    "# ----------------------------\n",
+    "# Import mindhf.transformers\n",
+    "# ----------------------------\n",
+    "def import_mindhf_transformers():\n",
+    "    from mindhf.transformers import (\n",
+    "        AutoTokenizer,\n",
+    "        AutoModelForMultipleChoice,\n",
+    "        Trainer,\n",
+    "        TrainingArguments,\n",
+    "    )\n",
+    "    return AutoTokenizer, AutoModelForMultipleChoice, Trainer, TrainingArguments\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. 初始化与加载数据集\n",
+    "\n",
+    "初始化运行环境，加载 SWAG 数据集，并根据配置截取训练集和验证集。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:21.104832Z",
+     "iopub.status.busy": "2025-12-14T16:21:21.104529Z",
+     "iopub.status.idle": "2025-12-14T16:21:31.986237Z",
+     "shell.execute_reply": "2025-12-14T16:21:31.984267Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> MindSpore: 2.7.1\n",
+      ">>> Device: Ascend:0 | Mode: PYNATIVE\n",
+      ">>> Model: google-bert/bert-base-uncased\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Data: Train=2000, Valid=1000\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 运行环境初始化\n",
+    "set_ms_context()\n",
+    "AutoTokenizer, AutoModelForMultipleChoice, Trainer, TrainingArguments = import_mindhf_transformers()\n",
+    "\n",
+    "# 配置\n",
+    "model_checkpoint = \"google-bert/bert-base-uncased\"\n",
+    "output_dir = \"./my_awesome_swag_model_ms\"\n",
+    "\n",
+    "# 样本量\n",
+    "max_train_samples = 2000\n",
+    "max_eval_samples = 1000\n",
+    "\n",
+    "print(f\">>> MindSpore: {ms.__version__}\")\n",
+    "print(f\">>> Device: Ascend:0 | Mode: PYNATIVE\")\n",
+    "print(f\">>> Model: {model_checkpoint}\")\n",
+    "\n",
+    "# 1. Dataset\n",
+    "from datasets import load_dataset\n",
+    "raw = load_dataset(\"swag\", \"regular\")\n",
+    "raw[\"train\"] = raw[\"train\"].select(range(min(max_train_samples, len(raw[\"train\"]))))\n",
+    "raw[\"validation\"] = raw[\"validation\"].select(range(min(max_eval_samples, len(raw[\"validation\"]))))\n",
+    "print(f\">>> Data: Train={len(raw['train'])}, Valid={len(raw['validation'])}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. 数据预处理\n",
+    "\n",
+    "加载 Tokenizer，将 SWAG 数据集的上下文与选项拼接，并进行编码（Tokenize）。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:31.990076Z",
+     "iopub.status.busy": "2025-12-14T16:21:31.989390Z",
+     "iopub.status.idle": "2025-12-14T16:21:49.785728Z",
+     "shell.execute_reply": "2025-12-14T16:21:49.784341Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:   0%|                                                                                                                 | 0/2000 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  50%|██████████████████████████████████████████████████                                                  | 1000/2000 [00:00<00:00, 1107.58 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1376.03 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1313.51 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:   0%|                                                                                                                 | 0/1000 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1619.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1580.89 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:   0%|                                                                                                                | 0/20005 [00:00<?, ? examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:   5%|████▉                                                                                              | 1000/20005 [00:00<00:16, 1136.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  10%|█████████▉                                                                                         | 2000/20005 [00:01<00:13, 1376.87 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  15%|██████████████▊                                                                                    | 3000/20005 [00:02<00:11, 1475.13 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  20%|███████████████████▊                                                                               | 4000/20005 [00:02<00:10, 1538.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  25%|████████████████████████▋                                                                          | 5000/20005 [00:03<00:10, 1383.59 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  30%|█████████████████████████████▋                                                                     | 6000/20005 [00:04<00:09, 1454.26 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  35%|██████████████████████████████████▋                                                                | 7000/20005 [00:04<00:08, 1502.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  40%|███████████████████████████████████████▌                                                           | 8000/20005 [00:05<00:07, 1530.10 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  45%|████████████████████████████████████████████▌                                                      | 9000/20005 [00:06<00:07, 1383.09 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  50%|████████████████████████████████████████████████▉                                                 | 10000/20005 [00:06<00:06, 1443.99 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  55%|█████████████████████████████████████████████████████▉                                            | 11000/20005 [00:07<00:06, 1493.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  60%|██████████████████████████████████████████████████████████▊                                       | 12000/20005 [00:08<00:05, 1358.22 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  65%|███████████████████████████████████████████████████████████████▋                                  | 13000/20005 [00:09<00:04, 1428.43 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  70%|████████████████████████████████████████████████████████████████████▌                             | 14000/20005 [00:09<00:04, 1477.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  75%|█████████████████████████████████████████████████████████████████████████▍                        | 15000/20005 [00:10<00:03, 1524.44 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  80%|██████████████████████████████████████████████████████████████████████████████▍                   | 16000/20005 [00:11<00:02, 1364.88 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  85%|███████████████████████████████████████████████████████████████████████████████████▎              | 17000/20005 [00:11<00:02, 1428.53 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  90%|████████████████████████████████████████████████████████████████████████████████████████▏         | 18000/20005 [00:12<00:01, 1487.75 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map:  95%|█████████████████████████████████████████████████████████████████████████████████████████████     | 19000/20005 [00:13<00:00, 1521.57 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████▉| 20000/20005 [00:13<00:00, 1402.11 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 20005/20005 [00:13<00:00, 1435.17 examples/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 2. Tokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
+    "\n",
+    "# 3. Preprocess\n",
+    "ending_names = [\"ending0\", \"ending1\", \"ending2\", \"ending3\"]\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    first_sentences = [[c] * 4 for c in examples[\"sent1\"]]\n",
+    "    question_headers = examples[\"sent2\"]\n",
+    "    second_sentences = [\n",
+    "        [f\"{h} {examples[end][i]}\" for end in ending_names]\n",
+    "        for i, h in enumerate(question_headers)\n",
+    "    ]\n",
+    "    \n",
+    "    # Flatten\n",
+    "    first_sentences = sum(first_sentences, [])\n",
+    "    second_sentences = sum(second_sentences, [])\n",
+    "\n",
+    "    tokenized = tokenizer(first_sentences, second_sentences, truncation=True)\n",
+    "    \n",
+    "    # Un-flatten\n",
+    "    result = {k: [v[i:i + 4] for i in range(0, len(v), 4)] for k, v in tokenized.items()}\n",
+    "    result[\"labels\"] = examples[\"label\"]\n",
+    "    return result\n",
+    "\n",
+    "encoded = raw.map(preprocess_function, batched=True, remove_columns=raw[\"train\"].column_names)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. 数据整理 (DataCollator)\n",
+    "\n",
+    "定义数据整理器，负责动态 Padding 以及将数据转换为 MindSpore Tensor。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:49.788935Z",
+     "iopub.status.busy": "2025-12-14T16:21:49.788572Z",
+     "iopub.status.idle": "2025-12-14T16:21:49.803251Z",
+     "shell.execute_reply": "2025-12-14T16:21:49.802107Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# ----------------------------\n",
+    "# DataCollator\n",
+    "# ----------------------------\n",
+    "@dataclass\n",
+    "class DataCollatorForMultipleChoice:\n",
+    "    tokenizer: Any\n",
+    "    padding: Union[bool, str] = \"longest\"\n",
+    "    pad_to_multiple_of: Optional[int] = None\n",
+    "    label_dtype: ms.dtype = ms.int32\n",
+    "\n",
+    "    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:\n",
+    "        label_key = \"labels\" if \"labels\" in features[0] else (\"label\" if \"label\" in features[0] else None)\n",
+    "        labels = [f.pop(label_key) for f in features] if label_key else None\n",
+    "\n",
+    "        batch_size = len(features)\n",
+    "        num_choices = len(features[0][\"input_ids\"])\n",
+    "\n",
+    "        flattened = []\n",
+    "        for feat in features:\n",
+    "            for i in range(num_choices):\n",
+    "                flattened.append({k: v[i] for k, v in feat.items()})\n",
+    "\n",
+    "        # Try returning MindSpore tensors directly\n",
+    "        try:\n",
+    "            batch = self.tokenizer.pad(\n",
+    "                flattened,\n",
+    "                padding=self.padding,\n",
+    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
+    "                return_tensors=\"ms\",\n",
+    "            )\n",
+    "            out = {k: v.reshape((batch_size, num_choices, -1)) for k, v in batch.items()}\n",
+    "            if labels is not None:\n",
+    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
+    "            return out\n",
+    "        except Exception:\n",
+    "            # Fallback to numpy then convert\n",
+    "            batch_np = self.tokenizer.pad(\n",
+    "                flattened,\n",
+    "                padding=self.padding,\n",
+    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
+    "                return_tensors=\"np\",\n",
+    "            )\n",
+    "            out = {}\n",
+    "            for k, v in batch_np.items():\n",
+    "                arr = np.asarray(v).reshape((batch_size, num_choices, -1))\n",
+    "                if k in (\"input_ids\", \"attention_mask\", \"token_type_ids\"):\n",
+    "                    arr = arr.astype(np.int32, copy=False)\n",
+    "                out[k] = ms.Tensor(arr)\n",
+    "            if labels is not None:\n",
+    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
+    "            return out\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. 模型构建与训练配置\n",
+    "\n",
+    "加载 `BertForMultipleChoice` 模型，配置 `TrainingArguments`，并初始化 `Trainer`。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:49.805891Z",
+     "iopub.status.busy": "2025-12-14T16:21:49.805554Z",
+     "iopub.status.idle": "2025-12-14T16:21:52.499460Z",
+     "shell.execute_reply": "2025-12-14T16:21:52.498081Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[MS_ALLOC_CONF] config:  enable_vmm:True  vmm_align_size:2MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of BertForMultipleChoice were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_37813/2626949888.py:34: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
+      "  trainer = Trainer(\n",
+      "The model is already on multiple devices. Skipping the move to device specified in `args`.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 4. Model\n",
+    "model = AutoModelForMultipleChoice.from_pretrained(model_checkpoint)\n",
+    "\n",
+    "# 5. Metrics\n",
+    "def compute_metrics(eval_predictions):\n",
+    "    if hasattr(eval_predictions, \"predictions\"):\n",
+    "        logits = eval_predictions.predictions\n",
+    "        labels = eval_predictions.label_ids\n",
+    "    else:\n",
+    "        logits, labels = eval_predictions\n",
+    "    preds = np.argmax(to_numpy(logits), axis=1)\n",
+    "    labels = to_numpy(labels)\n",
+    "    return {\"accuracy\": float((preds == labels).mean())}\n",
+    "\n",
+    "# 6. TrainingArguments (Fixed for current environment)\n",
+    "\n",
+    "train_args = TrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=8,\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    num_train_epochs=3,\n",
+    "    weight_decay=0.01,\n",
+    "    eval_strategy=\"epoch\",  \n",
+    "    save_strategy=\"epoch\",\n",
+    "    save_total_limit=1,\n",
+    "    logging_steps=50,\n",
+    "    push_to_hub=False,\n",
+    "    remove_unused_columns=False,\n",
+    "    report_to=[],\n",
+    ")\n",
+    "\n",
+    "# 7. Trainer\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=train_args,\n",
+    "    train_dataset=encoded[\"train\"],\n",
+    "    eval_dataset=encoded[\"validation\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=DataCollatorForMultipleChoice(tokenizer),\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. 执行训练与保存\n",
+    "\n",
+    "启动训练流程，完成后进行评估并保存模型权重。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:21:52.502318Z",
+     "iopub.status.busy": "2025-12-14T16:21:52.501983Z",
+     "iopub.status.idle": "2025-12-14T16:25:25.956052Z",
+     "shell.execute_reply": "2025-12-14T16:25:25.953760Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Starting training...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='750' max='750' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [750/750 03:19, Epoch 3/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.960300</td>\n",
+       "      <td>0.883173</td>\n",
+       "      <td>0.670000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.372600</td>\n",
+       "      <td>0.957089</td>\n",
+       "      <td>0.679000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.097600</td>\n",
+       "      <td>1.163250</td>\n",
+       "      <td>0.681000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Starting evaluation...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [125/125 00:02]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Eval metrics: {'eval_loss': 1.1632496118545532, 'eval_accuracy': 0.681, 'eval_runtime': 2.83, 'eval_samples_per_second': 353.355, 'eval_steps_per_second': 44.169, 'epoch': 3.0}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Model saved to: ./my_awesome_swag_model_ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 8. Run\n",
+    "print(\"\\n>>> Starting training...\")\n",
+    "trainer.train()\n",
+    "\n",
+    "print(\"\\n>>> Starting evaluation...\")\n",
+    "metrics = trainer.evaluate()\n",
+    "print(f\">>> Eval metrics: {metrics}\")\n",
+    "\n",
+    "# 9. Save\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "trainer.save_model(output_dir)\n",
+    "try:\n",
+    "    tokenizer.save_pretrained(output_dir)\n",
+    "except Exception:\n",
+    "    pass\n",
+    "print(f\">>> Model saved to: {output_dir}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. 推理演示\n",
+    "\n",
+    "从验证集中抽取样本，进行端到端的推理预测演示。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-14T16:25:25.962026Z",
+     "iopub.status.busy": "2025-12-14T16:25:25.961579Z",
+     "iopub.status.idle": "2025-12-14T16:25:26.000492Z",
+     "shell.execute_reply": "2025-12-14T16:25:25.999049Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Running Inference Demo...\n",
+      "--------------------------------------------------\n",
+      "Context: Students lower their eyes nervously.\n",
+      "Header : She\n",
+      "  [ ] pats her shoulder, then saunters toward someone.\n",
+      "  [ ] turns with two students.\n",
+      "  [x] walks slowly towards someone.\n",
+      "  [ ] wheels around as her dog thunders out.\n",
+      "--------------------------------------------------\n",
+      "Result: CORRECT (Pred: 2, Gold: 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 10. Inference Demo\n",
+    "print(\"\\n>>> Running Inference Demo...\")\n",
+    "sample = raw[\"validation\"][0]\n",
+    "context = sample[\"sent1\"]\n",
+    "header = sample[\"sent2\"]\n",
+    "choices = [sample[e] for e in ending_names]\n",
+    "\n",
+    "first = [context] * 4\n",
+    "second = [f\"{header} {c}\" for c in choices]\n",
+    "\n",
+    "tok = tokenizer(first, second, truncation=True, padding=True, return_tensors=\"ms\")\n",
+    "inputs = {k: v.reshape((1, 4, -1)) for k, v in tok.items()}\n",
+    "\n",
+    "outputs = model(**inputs)\n",
+    "logits = outputs[\"logits\"] if isinstance(outputs, dict) else outputs.logits\n",
+    "pred = int(np.argmax(logits.asnumpy(), axis=1)[0])\n",
+    "\n",
+    "print(\"-\" * 50)\n",
+    "print(f\"Context: {context}\")\n",
+    "print(f\"Header : {header}\")\n",
+    "for i, c in enumerate(choices):\n",
+    "    mark = \"[x]\" if i == pred else \"[ ]\"\n",
+    "    print(f\"  {mark} {c}\")\n",
+    "print(\"-\" * 50)\n",
+    "\n",
+    "gold = int(sample[\"label\"])\n",
+    "if pred == gold:\n",
+    "    print(f\"Result: CORRECT (Pred: {pred}, Gold: {gold})\")\n",
+    "else:\n",
+    "    print(f\"Result: INCORRECT (Pred: {pred}, Gold: {gold})\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ms_py311",
+   "language": "python",
+   "name": "ms_py311"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 92387420d72234400768e8cff8cc496f26e3eaab Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Sun, 21 Dec 2025 02:27:30 +0800
Subject: [PATCH 2/6] fixbug

---
 .DS_Store                                     | Bin 0 -> 6148 bytes
 nlp/.DS_Store                                 | Bin 0 -> 6148 bytes
 nlp/README.md                                 |   2 +-
 nlp/multiple_choice/.DS_Store                 | Bin 0 -> 6148 bytes
 .../SWAG_Multiple_Choice.ipynb                | 813 ++++++++++++++++
 .../swag_bert_run_result.ipynb                | 909 ------------------
 6 files changed, 814 insertions(+), 910 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 nlp/.DS_Store
 create mode 100644 nlp/multiple_choice/.DS_Store
 create mode 100644 nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
 delete mode 100644 nlp/multiple_choice/swag_bert_run_result.ipynb

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..225deb74a558d957ae8b21cc9eff906e52d07799
GIT binary patch
literal 6148
zcmeHKU60a06ukqi6j6hTCi`O2#Mc#JU89LFEDMR7s1J+L#E)Q0VQnle(z2Kk5}x%x
z_!oTjm-t_N(sO5qq%HWW#LOi#XF7B4?exqSLqwuI^O{68BC?PeOP7&tF>dFyVFg`t
z9Vlds<TQu|ZV<T}TvK5bFbeFR0=#zj$f0u@Qb_6b%Ui-RQvH`S*>z^l$RnSk)JkXO
zw;Z!T1G>m^#I0?dkrxl#C<-`!MTQ>H2rr5GB^2gDe946<qT|j6>t~KtrYCetC)A~9
z)T3@TX5l=?E>eKsILne!d`(GFr7?OLP=bEmK{-O-9SU+@EKAy}2$2JtWLZ*}Ei88)
zPxNN`-JJw}oHQCgM4?#PyHvKyR@HjjdmqhviIdFYwljUrPtPHxyDmHK3x7E8*KV9d
zVdD7Va4H>qZwShpmwxC)^L8`~y;%D8RD)HqD*f92V$nQ2Jg|?Bn#%)w@$f+d<$L#+
z%ZhdL_T5Kk{SU!7j6UQ0U;^8}w5uA|z~iOAc|Fif)m0b0X&6Ky4}hwj@=VN$!y(e*
zROk!?c}yL83amqVFY6%(H0^OE<I$&A^mv<};AxZgtI-sz;v<IHm!+{<Bbb_X4Ev{T
zt3_N(O%a6KY14ydRd>3VrgKICqrksVfcFO*iLs?|rBH4i$mA6OSVFZll=<%f``8*=
z8dnO@0u#y=s9c4<VhH7qdRy~a8dnOHI|+UH5c<tR-%x~nJL21tPNJpI<VFFbKvsdG
zdaUvJ-~IFbKkH<ki~>f1|4IQ->hwBoe3Cv}H$INfS{vyd5*zbY3Z)A&eH_aIAI1Af
b($Hpe0oc;GQivLu`6D1@Fqu){uPX2x5fAnS

literal 0
HcmV?d00001

diff --git a/nlp/.DS_Store b/nlp/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..fa701c2d7ccacf251c5f37c3558b0bc6fdb8145f
GIT binary patch
literal 6148
zcmeHKK~LK-6n-uR5+Sr6n8Y#Cu86e^G$bxrR}MSuLeO?#Q)!xPL<@1%r0b!ol=Jpu
zcHJ-8-`T!r8w7My4@hW2?@7<!v;96N{v6pc5vlGZ=@7Mu$b&OBd=!5%?q^@HhHF^`
zDl<k#Bg!eIJ`ESLrQv^6fY<H`UD5>WR?*G%yC9AFkkU7~9>%$jk;$LQEBNkjV~Ccr
z<7qi^?4R){&&sOZ{-qi#&DFJaZ{6GQK1LUM8dZa8Qg#R9Gp?O#oyS-4Abyvn(|&8~
zwJxebR-|J?$dVK(AKqt0qNiOwDU#A~BirHmp5Je6&t{#yy`5lxzcb$nW`{4^h+iDc
z=f3y!+4G~5{+Il+(BI5FNZ}hcw(0N+z9U$o(Wj&w#&~NUVoB=PXk6sF_>4@>_UVL<
zA!?7_0(Y?#$sNIeAnT^!sEUaKqJSuHKLy-==WX0?7-5S7qQE0mfcJ+0&KNqZEt;(Z
zg}DL%n`kzMn12$P6F3YV))vtNQ<e&}ROPN1%F@vuxV+F|ZPC(6xyy%gPgd@RqRi7V
zeqho`g%+h01w?_{3ar>_i_ib#f8YOalcXjJhyssF0oCk9y)LHY&eny=@mU+d&*5yG
nS6jTMATUQUa``CUh8shBzyUCHSX)F3OnwB63{r>!530Z)BL#RA

literal 0
HcmV?d00001

diff --git a/nlp/README.md b/nlp/README.md
index a96be8f..8345145 100644
--- a/nlp/README.md
+++ b/nlp/README.md
@@ -6,7 +6,7 @@ This directory contains ready-to-use Natural Language Processing application not
 
 | No. | Model | Description |
 | :-- | :---- | :---------- |
-| 1   | [BERT (SWAG)](./multiple_choice/swag_bert_run_result.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using MindSpore NLP. |
+| 1   | [BERT (SWAG_Multiple_Choice)](./multiple_choice/SWAG_Multiple_Choice.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using Mind NLP. |
 
 ## Contributing New NLP Applications
 
diff --git a/nlp/multiple_choice/.DS_Store b/nlp/multiple_choice/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..2a48537ade332bec09c484a35dcc1fa53beca53b
GIT binary patch
literal 6148
zcmeH~Jx;?w5QX1Dibxcsq}&rk;s#5U6x195Fd&2!kqC$oojZ;VZ+<{nhJuCyy^&_$
z?s&#)e}%^jpe%OR$G{B0lx~Vw8^&hW%}2JEkwT^O9_Q$Ag9kieKFa<+V6}&M#4|2&
zJE*_I7B9F*ce>tOb?Z&XOnb}|`uxp}Gnoj8fCz|y2#CP%2w3l>O%Fmfihu}+z$XFw
zK2*ACOYKAb)4{=y0MrS?Zk+pBf?6#=ZK-`IBeW8w)F>@^#3)hDc+I?)+J{Ctito((
z<jy5eD8+YYym~rH3)LtBA}}PdAIq8b{}ug*^Z&3&D-jTZe?_27mS@WaUnzU*=;f@}
uR{90~!<g&o9K01%y%lp|t@!9mUbQ)|x70q=%Nh4_s(u8Fi%bOmg1|fR`y9mp

literal 0
HcmV?d00001

diff --git a/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb b/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
new file mode 100644
index 0000000..19c4fde
--- /dev/null
+++ b/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
@@ -0,0 +1,813 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bc899378",
+   "metadata": {},
+   "source": [
+    "# 基于 MindSpore 的 BERT 模型 SWAG 多选阅读理解任务\n",
+    "\n",
+    "## 案例介绍\n",
+    "\n",
+    "**SWAG** (Situations With Adversarial Generations) 是一个大规模的对抗性数据集，用于基于常识的自然语言推理 (NLI)。给定一个部分描述的事件作为上下文，任务是从四个选项中选择最合理的结尾。\n",
+    "\n",
+    "本案例基于 **MindSpore** 框架和 **MindNLP** 套件，使用 **BERT** (Bidirectional Encoder Representations from Transformers) 预训练模型在 SWAG 数据集上进行微调 (Fine-tune)，实现多项选择任务的自动推理。\n",
+    "\n",
+    "**核心流程：**\n",
+    "1.  **环境准备**：配置 MindSpore 运行环境及 HF-Mirror 镜像加速。\n",
+    "2.  **数据处理**：加载 SWAG 数据集，进行 Tokenization、Flatten 处理及动态 Padding。\n",
+    "3.  **模型构建**：加载 BERT 预训练权重，构建多选分类网络。\n",
+    "4.  **模型训练**：定义损失函数与优化器，执行微调。\n",
+    "5.  **模型推理**：加载微调后的模型，演示端到端推理（包含针对 MindNLP 跨框架特性的适配）。\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fffcc076",
+   "metadata": {},
+   "source": [
+    "## 环境准备\n",
+    "\n",
+    "本案例运行环境要求如下：\n",
+    "\n",
+    "| Python | MindSpore | MindNLP |\n",
+    "| :----- | :-------- | :------ |\n",
+    "| 3.9+   | >= 2.7.0    | >= 0.5.1  |\n",
+    "\n",
+    "首先导入必要的依赖库，并设置环境变量以使用 HF-Mirror 国内镜像加速下载。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "49e9cf92",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:27.279544Z",
+     "iopub.status.busy": "2025-12-20T18:14:27.279122Z",
+     "iopub.status.idle": "2025-12-20T18:14:52.133383Z",
+     "shell.execute_reply": "2025-12-20T18:14:52.131939Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
+      "  setattr(self, word, getattr(machar, word).flat[0])\n",
+      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
+      "  return self._float_to_str(self.smallest_subnormal)\n",
+      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
+      "  setattr(self, word, getattr(machar, word).flat[0])\n",
+      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
+      "  return self._float_to_str(self.smallest_subnormal)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/.conda/envs/py311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Modular Diffusers is currently an experimental feature under active development. The API is subject to breaking changes in future releases.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> MindSpore Version: 2.7.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import mindspore as ms\n",
+    "import numpy as np\n",
+    "from dataclasses import dataclass\n",
+    "from typing import Any, Dict, List, Optional, Union\n",
+    "\n",
+    "# ----------------------------\n",
+    "# Environment (HF-Mirror)\n",
+    "# ----------------------------\n",
+    "os.environ.setdefault(\"HF_ENDPOINT\", \"https://hf-mirror.com\")\n",
+    "os.environ.setdefault(\"TOKENIZERS_PARALLELISM\", \"false\")\n",
+    "\n",
+    "# ----------------------------\n",
+    "# Import mindnlp.transformers\n",
+    "# ----------------------------\n",
+    "import mindnlp  # noqa: F401\n",
+    "\n",
+    "from mindnlp.transformers import (\n",
+    "    AutoTokenizer,\n",
+    "    AutoModelForMultipleChoice,\n",
+    "    Trainer,\n",
+    "    TrainingArguments,\n",
+    ")\n",
+    "\n",
+    "print(f\">>> MindSpore Version: {ms.__version__}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4d355e84",
+   "metadata": {},
+   "source": [
+    "#### **定义辅助函数**\n",
+    "\n",
+    "为了确保代码的健壮性以及在不同硬件（Ascend/GPU/CPU）上的兼容性，我们定义以下工具函数：\n",
+    "- `set_ms_context`: 设置 MindSpore 运行模式（PYNATIVE）。\n",
+    "- `to_numpy`: 鲁棒的 Tensor 转 Numpy 函数，兼容 MindSpore Tensor 和 PyTorch Tensor。\n",
+    "- `move_inputs_to_device`: 确保推理时输入数据与模型在同一设备上。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36eb9465",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:52.136850Z",
+     "iopub.status.busy": "2025-12-20T18:14:52.136224Z",
+     "iopub.status.idle": "2025-12-20T18:14:52.153736Z",
+     "shell.execute_reply": "2025-12-20T18:14:52.152509Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Context set to PYNATIVE | Ascend:0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ----------------------------\n",
+    "# MindSpore context\n",
+    "# ----------------------------\n",
+    "def set_ms_context():\n",
+    "    ms.set_seed(42)\n",
+    "    ms.set_context(mode=ms.PYNATIVE_MODE)\n",
+    "    try:\n",
+    "        ms.set_device(\"Ascend\", 0)\n",
+    "    except AttributeError:\n",
+    "        ms.set_context(device_target=\"Ascend\", device_id=0)\n",
+    "\n",
+    "def to_numpy(x) -> np.ndarray:\n",
+    "    if x is None:\n",
+    "        return None\n",
+    "    if isinstance(x, np.ndarray):\n",
+    "        return x\n",
+    "    if isinstance(x, (list, tuple)):\n",
+    "        return np.asarray(x)\n",
+    "\n",
+    "    # MindSpore Tensor\n",
+    "    if hasattr(x, \"asnumpy\"):\n",
+    "        try:\n",
+    "            return x.asnumpy()\n",
+    "        except Exception:\n",
+    "            pass\n",
+    "\n",
+    "    # (mind)torch Tensor\n",
+    "    if hasattr(x, \"detach\"):\n",
+    "        try:\n",
+    "            x = x.detach()\n",
+    "        except Exception:\n",
+    "            pass\n",
+    "    if hasattr(x, \"cpu\"):\n",
+    "        try:\n",
+    "            x = x.cpu()\n",
+    "        except Exception:\n",
+    "            pass\n",
+    "    if hasattr(x, \"numpy\"):\n",
+    "        try:\n",
+    "            return x.numpy()\n",
+    "        except Exception:\n",
+    "            pass\n",
+    "\n",
+    "    return np.asarray(x)\n",
+    "\n",
+    "def get_model_device(model):\n",
+    "    \"\"\"从参数上取 device（适配 mindtorch/torch 风格模型）。\"\"\"\n",
+    "    try:\n",
+    "        for p in model.parameters():\n",
+    "            return p.device\n",
+    "    except Exception:\n",
+    "        return None\n",
+    "    return None\n",
+    "\n",
+    "def move_inputs_to_device(inputs: Dict[str, Any], device):\n",
+    "    \"\"\"把 Batch inputs 全部移动到同一 device（仅对有 .to 的张量生效）。\"\"\"\n",
+    "    if device is None:\n",
+    "        return inputs\n",
+    "    out = {}\n",
+    "    for k, v in inputs.items():\n",
+    "        if hasattr(v, \"to\"):\n",
+    "            out[k] = v.to(device)\n",
+    "        else:\n",
+    "            out[k] = v\n",
+    "    return out\n",
+    "\n",
+    "# 初始化上下文\n",
+    "set_ms_context()\n",
+    "print(\">>> Context set to PYNATIVE | Ascend:0\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6557024",
+   "metadata": {},
+   "source": [
+    "## 数据加载与预处理\n",
+    "\n",
+    "我们使用 HuggingFace `datasets` 库加载 SWAG 数据集。\n",
+    "为了演示效率，我们从原始训练集和验证集中截取部分数据进行训练。\n",
+    "\n",
+    "- **model_checkpoint**: 使用 `google-bert/bert-base-uncased`。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "39916ecd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:52.156477Z",
+     "iopub.status.busy": "2025-12-20T18:14:52.156148Z",
+     "iopub.status.idle": "2025-12-20T18:14:56.105448Z",
+     "shell.execute_reply": "2025-12-20T18:14:56.104088Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Model: google-bert/bert-base-uncased\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Data: Train=2000, Valid=1000\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 配置\n",
+    "model_checkpoint = \"google-bert/bert-base-uncased\"\n",
+    "output_dir = \"./my_awesome_swag_model_ms\"\n",
+    "\n",
+    "# 样本量\n",
+    "max_train_samples = 2000\n",
+    "max_eval_samples = 1000\n",
+    "\n",
+    "print(f\">>> Model: {model_checkpoint}\")\n",
+    "\n",
+    "# 1. Dataset\n",
+    "from datasets import load_dataset\n",
+    "raw = load_dataset(\"swag\", \"regular\")\n",
+    "\n",
+    "# 截取子集\n",
+    "raw[\"train\"] = raw[\"train\"].select(range(min(max_train_samples, len(raw[\"train\"]))))\n",
+    "raw[\"validation\"] = raw[\"validation\"].select(range(min(max_eval_samples, len(raw[\"validation\"]))))\n",
+    "\n",
+    "print(f\">>> Data: Train={len(raw['train'])}, Valid={len(raw['validation'])}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd438d15",
+   "metadata": {},
+   "source": [
+    "#### **数据预处理 (Tokenization)**\n",
+    "\n",
+    "多选任务的数据预处理稍显特殊。我们需要将 **Context (sent1)** 与 **Header (sent2)** 结合，分别与 4 个 **Ending** 选项拼接，形成 4 个独立的输入序列。\n",
+    "\n",
+    "1. **Flatten**: 将 `(Batch, 4)` 的结构展平为 `(Batch * 4)` 进行 Tokenize。\n",
+    "2. **Tokenize**: 使用 BERT Tokenizer 进行编码。\n",
+    "3. **Un-flatten**: 将编码后的结果重新 reshape 回 `(Batch, 4, Seq_Len)`。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f14c6fcc",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:56.108448Z",
+     "iopub.status.busy": "2025-12-20T18:14:56.108110Z",
+     "iopub.status.idle": "2025-12-20T18:14:56.945719Z",
+     "shell.execute_reply": "2025-12-20T18:14:56.944331Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data preprocessing completed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 2. Tokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
+    "\n",
+    "# 3. Preprocess\n",
+    "ending_names = [\"ending0\", \"ending1\", \"ending2\", \"ending3\"]\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    first_sentences = [[c] * 4 for c in examples[\"sent1\"]]\n",
+    "    question_headers = examples[\"sent2\"]\n",
+    "    second_sentences = [\n",
+    "        [f\"{h} {examples[end][i]}\" for end in ending_names]\n",
+    "        for i, h in enumerate(question_headers)\n",
+    "    ]\n",
+    "\n",
+    "    # Flatten\n",
+    "    first_sentences = sum(first_sentences, [])\n",
+    "    second_sentences = sum(second_sentences, [])\n",
+    "\n",
+    "    tokenized = tokenizer(first_sentences, second_sentences, truncation=True)\n",
+    "\n",
+    "    # Un-flatten\n",
+    "    result = {k: [v[i:i + 4] for i in range(0, len(v), 4)] for k, v in tokenized.items()}\n",
+    "    result[\"labels\"] = examples[\"label\"]\n",
+    "    return result\n",
+    "\n",
+    "# 执行 Map 操作\n",
+    "encoded = raw.map(preprocess_function, batched=True, remove_columns=raw[\"train\"].column_names)\n",
+    "print(\"Data preprocessing completed.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f739f70",
+   "metadata": {},
+   "source": [
+    "#### **定义 DataCollator**\n",
+    "\n",
+    "定义数据整理器，负责在 Batch 层面进行 **Dynamic Padding**，并将数据转换为 MindSpore Tensor。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f6808579",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:56.948719Z",
+     "iopub.status.busy": "2025-12-20T18:14:56.948398Z",
+     "iopub.status.idle": "2025-12-20T18:14:56.963209Z",
+     "shell.execute_reply": "2025-12-20T18:14:56.962011Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# ----------------------------\n",
+    "# DataCollator\n",
+    "# ----------------------------\n",
+    "@dataclass\n",
+    "class DataCollatorForMultipleChoice:\n",
+    "    tokenizer: Any\n",
+    "    padding: Union[bool, str] = \"longest\"\n",
+    "    pad_to_multiple_of: Optional[int] = None\n",
+    "    label_dtype: ms.dtype = ms.int32\n",
+    "\n",
+    "    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:\n",
+    "        label_key = \"labels\" if \"labels\" in features[0] else (\"label\" if \"label\" in features[0] else None)\n",
+    "        labels = [f.pop(label_key) for f in features] if label_key else None\n",
+    "\n",
+    "        batch_size = len(features)\n",
+    "        num_choices = len(features[0][\"input_ids\"])\n",
+    "\n",
+    "        flattened = []\n",
+    "        for feat in features:\n",
+    "            for i in range(num_choices):\n",
+    "                flattened.append({k: v[i] for k, v in feat.items()})\n",
+    "\n",
+    "        # 训练阶段：优先返回 MindSpore 张量\n",
+    "        try:\n",
+    "            batch = self.tokenizer.pad(\n",
+    "                flattened,\n",
+    "                padding=self.padding,\n",
+    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
+    "                return_tensors=\"ms\",\n",
+    "            )\n",
+    "            out = {k: v.reshape((batch_size, num_choices, -1)) for k, v in batch.items()}\n",
+    "            if labels is not None:\n",
+    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
+    "            return out\n",
+    "        except Exception:\n",
+    "            # Fallback\n",
+    "            batch_np = self.tokenizer.pad(\n",
+    "                flattened,\n",
+    "                padding=self.padding,\n",
+    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
+    "                return_tensors=\"np\",\n",
+    "            )\n",
+    "            out = {}\n",
+    "            for k, v in batch_np.items():\n",
+    "                arr = np.asarray(v).reshape((batch_size, num_choices, -1))\n",
+    "                if k in (\"input_ids\", \"attention_mask\", \"token_type_ids\"):\n",
+    "                    arr = arr.astype(np.int32, copy=False)\n",
+    "                out[k] = ms.Tensor(arr)\n",
+    "            if labels is not None:\n",
+    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
+    "            return out"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "368fbebe",
+   "metadata": {},
+   "source": [
+    "## 模型构建\n",
+    "\n",
+    "使用 `AutoModelForMultipleChoice` 加载预训练模型。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "41a6399a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:56.966036Z",
+     "iopub.status.busy": "2025-12-20T18:14:56.965545Z",
+     "iopub.status.idle": "2025-12-20T18:14:58.261180Z",
+     "shell.execute_reply": "2025-12-20T18:14:58.260039Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[MS_ALLOC_CONF]Runtime config:  enable_vmm:True  vmm_align_size:2MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of BertForMultipleChoice were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model loaded.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 4. Model\n",
+    "model = AutoModelForMultipleChoice.from_pretrained(model_checkpoint)\n",
+    "print(\"Model loaded.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16ac1dcf",
+   "metadata": {},
+   "source": [
+    "## 模型训练\n",
+    "\n",
+    "配置 `TrainingArguments` 并初始化 `Trainer`。\n",
+    "我们定义 `compute_metrics` 函数来计算准确率 (Accuracy)。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f6188891",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:14:58.264194Z",
+     "iopub.status.busy": "2025-12-20T18:14:58.263795Z",
+     "iopub.status.idle": "2025-12-20T18:15:00.066353Z",
+     "shell.execute_reply": "2025-12-20T18:15:00.064796Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_54703/2519269147.py:30: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
+      "  trainer = Trainer(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 5. Metrics\n",
+    "def compute_metrics(eval_predictions):\n",
+    "    if hasattr(eval_predictions, \"predictions\"):\n",
+    "        logits = eval_predictions.predictions\n",
+    "        labels = eval_predictions.label_ids\n",
+    "    else:\n",
+    "        logits, labels = eval_predictions\n",
+    "    preds = np.argmax(to_numpy(logits), axis=1)\n",
+    "    labels = to_numpy(labels)\n",
+    "    return {\"accuracy\": float((preds == labels).mean())}\n",
+    "\n",
+    "# 6. TrainingArguments\n",
+    "train_args = TrainingArguments(\n",
+    "    output_dir=output_dir,\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=8,\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    num_train_epochs=3,\n",
+    "    weight_decay=0.01,\n",
+    "    eval_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    save_total_limit=1,\n",
+    "    logging_steps=50,\n",
+    "    push_to_hub=False,\n",
+    "    remove_unused_columns=False,\n",
+    "    report_to=[],\n",
+    ")\n",
+    "\n",
+    "# 7. Trainer\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=train_args,\n",
+    "    train_dataset=encoded[\"train\"],\n",
+    "    eval_dataset=encoded[\"validation\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=DataCollatorForMultipleChoice(tokenizer),\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bf8bd26",
+   "metadata": {},
+   "source": [
+    "#### **执行训练与保存**\n",
+    "\n",
+    "调用 `trainer.train()` 开始训练，训练结束后保存模型和 Tokenizer。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9611e086",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:15:00.069802Z",
+     "iopub.status.busy": "2025-12-20T18:15:00.069470Z",
+     "iopub.status.idle": "2025-12-20T18:22:24.765471Z",
+     "shell.execute_reply": "2025-12-20T18:22:24.763076Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Starting training...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='750' max='750' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [750/750 07:10, Epoch 3/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.930500</td>\n",
+       "      <td>0.897192</td>\n",
+       "      <td>0.650000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.452700</td>\n",
+       "      <td>0.942733</td>\n",
+       "      <td>0.666000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.194200</td>\n",
+       "      <td>1.143144</td>\n",
+       "      <td>0.681000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Starting evaluation...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [125/125 00:05]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Eval metrics: {'eval_loss': 1.1431440114974976, 'eval_accuracy': 0.681, 'eval_runtime': 5.1806, 'eval_samples_per_second': 193.028, 'eval_steps_per_second': 24.129, 'epoch': 3.0}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Model saved to: ./my_awesome_swag_model_ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 8. Run\n",
+    "print(\"\\n>>> Starting training...\")\n",
+    "trainer.train()\n",
+    "\n",
+    "print(\"\\n>>> Starting evaluation...\")\n",
+    "metrics = trainer.evaluate()\n",
+    "print(f\">>> Eval metrics: {metrics}\")\n",
+    "\n",
+    "# 9. Save\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "trainer.save_model(output_dir)\n",
+    "try:\n",
+    "    tokenizer.save_pretrained(output_dir)\n",
+    "except Exception:\n",
+    "    pass\n",
+    "print(f\">>> Model saved to: {output_dir}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e52e851f",
+   "metadata": {},
+   "source": [
+    "## 模型推理\n",
+    "\n",
+    "为了验证模型效果，我们进行一次端到端的推理演示。\n",
+    "\n",
+    "**注意（关键修复）：**\n",
+    "在 MindNLP 环境下，为了确保推理的稳定性和跨后端兼容性，我们采取以下策略：\n",
+    "1.  **`return_tensors=\"pt\"`**: 使用 PyTorch 兼容的 Tensor 格式（MindNLP 会自动代理到 MindTorch）。\n",
+    "2.  **`move_inputs_to_device`**: 显式将输入数据移动到模型参数所在的设备，避免 \"All tensor arguments must be on the same device\" 错误。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01860b80",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-12-20T18:22:24.770484Z",
+     "iopub.status.busy": "2025-12-20T18:22:24.770015Z",
+     "iopub.status.idle": "2025-12-20T18:22:24.834243Z",
+     "shell.execute_reply": "2025-12-20T18:22:24.832738Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      ">>> Running Inference Demo...\n",
+      ">>> Inference model device: device(type=npu, index=0)\n",
+      "--------------------------------------------------\n",
+      "Context: Students lower their eyes nervously.\n",
+      "Header : She\n",
+      "  [ ] pats her shoulder, then saunters toward someone.\n",
+      "  [ ] turns with two students.\n",
+      "  [x] walks slowly towards someone.\n",
+      "  [ ] wheels around as her dog thunders out.\n",
+      "--------------------------------------------------\n",
+      "Result: CORRECT (Pred: 2, Gold: 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 10. Inference Demo (FIXED)\n",
+    "print(\"\\n>>> Running Inference Demo...\")\n",
+    "\n",
+    "# 推理阶段需要 torch/no_grad；在 mindnlp 环境下 torch 会被代理到 mindtorch\n",
+    "import torch\n",
+    "\n",
+    "model.eval()\n",
+    "device = get_model_device(model)\n",
+    "print(f\">>> Inference model device: {device}\")\n",
+    "\n",
+    "# 1. 准备单条样本\n",
+    "sample = raw[\"validation\"][0]\n",
+    "context = sample[\"sent1\"]\n",
+    "header = sample[\"sent2\"]\n",
+    "choices = [sample[e] for e in ending_names]\n",
+    "\n",
+    "first = [context] * 4\n",
+    "second = [f\"{header} {c}\" for c in choices]\n",
+    "\n",
+    "# 2. Tokenize\n",
+    "tok = tokenizer(first, second, truncation=True, padding=True, return_tensors=\"pt\")\n",
+    "inputs = {k: v.reshape((1, 4, -1)) for k, v in tok.items()}\n",
+    "inputs = move_inputs_to_device(inputs, device)\n",
+    "\n",
+    "# 3. 执行前向计算 (No Grad)\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(**inputs)\n",
+    "\n",
+    "logits = outputs[\"logits\"] if isinstance(outputs, dict) else outputs.logits\n",
+    "pred = int(np.argmax(to_numpy(logits), axis=1)[0])\n",
+    "\n",
+    "# 4. 打印结果\n",
+    "print(\"-\" * 50)\n",
+    "print(f\"Context: {context}\")\n",
+    "print(f\"Header : {header}\")\n",
+    "for i, c in enumerate(choices):\n",
+    "    mark = \"[x]\" if i == pred else \"[ ]\"\n",
+    "    print(f\"  {mark} {c}\")\n",
+    "print(\"-\" * 50)\n",
+    "\n",
+    "gold = int(sample[\"label\"])\n",
+    "if pred == gold:\n",
+    "    print(f\"Result: CORRECT (Pred: {pred}, Gold: {gold})\")\n",
+    "else:\n",
+    "    print(f\"Result: INCORRECT (Pred: {pred}, Gold: {gold})\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nlp/multiple_choice/swag_bert_run_result.ipynb b/nlp/multiple_choice/swag_bert_run_result.ipynb
deleted file mode 100644
index f2b7fd1..0000000
--- a/nlp/multiple_choice/swag_bert_run_result.ipynb
+++ /dev/null
@@ -1,909 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# SWAG Multiple Choice Finetune + Inference (MindSpore + MindHF)\n",
-    "\n",
-    "**特性：**\n",
-    "\n",
-    "- 流程：训练 -> 评估 -> 保存 -> 推理演示\n",
-    "\n",
-    "本案例基于 MindSpore 2.7.1 和 MindHF 0.6.0 实现 BERT 模型在 SWAG 数据集上的多选任务微调。\n",
-    "## 1. 导入依赖与环境配置\n",
-    "\n",
-    "导入必要的库，并设置 HuggingFace 镜像加速下载。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "editable": true,
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:14.050863Z",
-     "iopub.status.busy": "2025-12-14T16:21:14.050528Z",
-     "iopub.status.idle": "2025-12-14T16:21:21.091146Z",
-     "shell.execute_reply": "2025-12-14T16:21:21.089643Z"
-    },
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n",
-      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'false'"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from dataclasses import dataclass\n",
-    "from typing import Any, Dict, List, Optional, Union\n",
-    "\n",
-    "import numpy as np\n",
-    "import mindspore as ms\n",
-    "\n",
-    "# ----------------------------\n",
-    "# Environment (HF-Mirror)\n",
-    "# ----------------------------\n",
-    "os.environ.setdefault(\"HF_ENDPOINT\", \"https://hf-mirror.com\")\n",
-    "os.environ.setdefault(\"TOKENIZERS_PARALLELISM\", \"false\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. 定义上下文设置函数\n",
-    "\n",
-    "定义设置 MindSpore 运行环境（Ascend/PYNATIVE）和导入 MindHF 组件的辅助函数。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:21.094517Z",
-     "iopub.status.busy": "2025-12-14T16:21:21.094070Z",
-     "iopub.status.idle": "2025-12-14T16:21:21.102099Z",
-     "shell.execute_reply": "2025-12-14T16:21:21.100812Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# ----------------------------\n",
-    "# MindSpore context\n",
-    "# ----------------------------\n",
-    "def set_ms_context():\n",
-    "    ms.set_seed(42)\n",
-    "    ms.set_context(mode=ms.PYNATIVE_MODE)\n",
-    "    try:\n",
-    "        ms.set_device(\"Ascend\", 0)\n",
-    "    except AttributeError:\n",
-    "        ms.set_context(device_target=\"Ascend\", device_id=0)\n",
-    "\n",
-    "\n",
-    "def to_numpy(x) -> np.ndarray:\n",
-    "    if isinstance(x, np.ndarray):\n",
-    "        return x\n",
-    "    if hasattr(x, \"asnumpy\"):\n",
-    "        return x.asnumpy()\n",
-    "    return np.asarray(x)\n",
-    "\n",
-    "\n",
-    "# ----------------------------\n",
-    "# Import mindhf.transformers\n",
-    "# ----------------------------\n",
-    "def import_mindhf_transformers():\n",
-    "    from mindhf.transformers import (\n",
-    "        AutoTokenizer,\n",
-    "        AutoModelForMultipleChoice,\n",
-    "        Trainer,\n",
-    "        TrainingArguments,\n",
-    "    )\n",
-    "    return AutoTokenizer, AutoModelForMultipleChoice, Trainer, TrainingArguments\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 初始化与加载数据集\n",
-    "\n",
-    "初始化运行环境，加载 SWAG 数据集，并根据配置截取训练集和验证集。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:21.104832Z",
-     "iopub.status.busy": "2025-12-14T16:21:21.104529Z",
-     "iopub.status.idle": "2025-12-14T16:21:31.986237Z",
-     "shell.execute_reply": "2025-12-14T16:21:31.984267Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/root/.conda/envs/ms_py311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> MindSpore: 2.7.1\n",
-      ">>> Device: Ascend:0 | Mode: PYNATIVE\n",
-      ">>> Model: google-bert/bert-base-uncased\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Data: Train=2000, Valid=1000\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 运行环境初始化\n",
-    "set_ms_context()\n",
-    "AutoTokenizer, AutoModelForMultipleChoice, Trainer, TrainingArguments = import_mindhf_transformers()\n",
-    "\n",
-    "# 配置\n",
-    "model_checkpoint = \"google-bert/bert-base-uncased\"\n",
-    "output_dir = \"./my_awesome_swag_model_ms\"\n",
-    "\n",
-    "# 样本量\n",
-    "max_train_samples = 2000\n",
-    "max_eval_samples = 1000\n",
-    "\n",
-    "print(f\">>> MindSpore: {ms.__version__}\")\n",
-    "print(f\">>> Device: Ascend:0 | Mode: PYNATIVE\")\n",
-    "print(f\">>> Model: {model_checkpoint}\")\n",
-    "\n",
-    "# 1. Dataset\n",
-    "from datasets import load_dataset\n",
-    "raw = load_dataset(\"swag\", \"regular\")\n",
-    "raw[\"train\"] = raw[\"train\"].select(range(min(max_train_samples, len(raw[\"train\"]))))\n",
-    "raw[\"validation\"] = raw[\"validation\"].select(range(min(max_eval_samples, len(raw[\"validation\"]))))\n",
-    "print(f\">>> Data: Train={len(raw['train'])}, Valid={len(raw['validation'])}\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 数据预处理\n",
-    "\n",
-    "加载 Tokenizer，将 SWAG 数据集的上下文与选项拼接，并进行编码（Tokenize）。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:31.990076Z",
-     "iopub.status.busy": "2025-12-14T16:21:31.989390Z",
-     "iopub.status.idle": "2025-12-14T16:21:49.785728Z",
-     "shell.execute_reply": "2025-12-14T16:21:49.784341Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:   0%|                                                                                                                 | 0/2000 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  50%|██████████████████████████████████████████████████                                                  | 1000/2000 [00:00<00:00, 1107.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1376.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:01<00:00, 1313.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:   0%|                                                                                                                 | 0/1000 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1619.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1580.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:   0%|                                                                                                                | 0/20005 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:   5%|████▉                                                                                              | 1000/20005 [00:00<00:16, 1136.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  10%|█████████▉                                                                                         | 2000/20005 [00:01<00:13, 1376.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  15%|██████████████▊                                                                                    | 3000/20005 [00:02<00:11, 1475.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  20%|███████████████████▊                                                                               | 4000/20005 [00:02<00:10, 1538.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  25%|████████████████████████▋                                                                          | 5000/20005 [00:03<00:10, 1383.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  30%|█████████████████████████████▋                                                                     | 6000/20005 [00:04<00:09, 1454.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  35%|██████████████████████████████████▋                                                                | 7000/20005 [00:04<00:08, 1502.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  40%|███████████████████████████████████████▌                                                           | 8000/20005 [00:05<00:07, 1530.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  45%|████████████████████████████████████████████▌                                                      | 9000/20005 [00:06<00:07, 1383.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  50%|████████████████████████████████████████████████▉                                                 | 10000/20005 [00:06<00:06, 1443.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  55%|█████████████████████████████████████████████████████▉                                            | 11000/20005 [00:07<00:06, 1493.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  60%|██████████████████████████████████████████████████████████▊                                       | 12000/20005 [00:08<00:05, 1358.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  65%|███████████████████████████████████████████████████████████████▋                                  | 13000/20005 [00:09<00:04, 1428.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  70%|████████████████████████████████████████████████████████████████████▌                             | 14000/20005 [00:09<00:04, 1477.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  75%|█████████████████████████████████████████████████████████████████████████▍                        | 15000/20005 [00:10<00:03, 1524.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  80%|██████████████████████████████████████████████████████████████████████████████▍                   | 16000/20005 [00:11<00:02, 1364.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  85%|███████████████████████████████████████████████████████████████████████████████████▎              | 17000/20005 [00:11<00:02, 1428.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  90%|████████████████████████████████████████████████████████████████████████████████████████▏         | 18000/20005 [00:12<00:01, 1487.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map:  95%|█████████████████████████████████████████████████████████████████████████████████████████████     | 19000/20005 [00:13<00:00, 1521.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████▉| 20000/20005 [00:13<00:00, 1402.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 20005/20005 [00:13<00:00, 1435.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 2. Tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
-    "\n",
-    "# 3. Preprocess\n",
-    "ending_names = [\"ending0\", \"ending1\", \"ending2\", \"ending3\"]\n",
-    "\n",
-    "def preprocess_function(examples):\n",
-    "    first_sentences = [[c] * 4 for c in examples[\"sent1\"]]\n",
-    "    question_headers = examples[\"sent2\"]\n",
-    "    second_sentences = [\n",
-    "        [f\"{h} {examples[end][i]}\" for end in ending_names]\n",
-    "        for i, h in enumerate(question_headers)\n",
-    "    ]\n",
-    "    \n",
-    "    # Flatten\n",
-    "    first_sentences = sum(first_sentences, [])\n",
-    "    second_sentences = sum(second_sentences, [])\n",
-    "\n",
-    "    tokenized = tokenizer(first_sentences, second_sentences, truncation=True)\n",
-    "    \n",
-    "    # Un-flatten\n",
-    "    result = {k: [v[i:i + 4] for i in range(0, len(v), 4)] for k, v in tokenized.items()}\n",
-    "    result[\"labels\"] = examples[\"label\"]\n",
-    "    return result\n",
-    "\n",
-    "encoded = raw.map(preprocess_function, batched=True, remove_columns=raw[\"train\"].column_names)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5. 数据整理 (DataCollator)\n",
-    "\n",
-    "定义数据整理器，负责动态 Padding 以及将数据转换为 MindSpore Tensor。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:49.788935Z",
-     "iopub.status.busy": "2025-12-14T16:21:49.788572Z",
-     "iopub.status.idle": "2025-12-14T16:21:49.803251Z",
-     "shell.execute_reply": "2025-12-14T16:21:49.802107Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# ----------------------------\n",
-    "# DataCollator\n",
-    "# ----------------------------\n",
-    "@dataclass\n",
-    "class DataCollatorForMultipleChoice:\n",
-    "    tokenizer: Any\n",
-    "    padding: Union[bool, str] = \"longest\"\n",
-    "    pad_to_multiple_of: Optional[int] = None\n",
-    "    label_dtype: ms.dtype = ms.int32\n",
-    "\n",
-    "    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:\n",
-    "        label_key = \"labels\" if \"labels\" in features[0] else (\"label\" if \"label\" in features[0] else None)\n",
-    "        labels = [f.pop(label_key) for f in features] if label_key else None\n",
-    "\n",
-    "        batch_size = len(features)\n",
-    "        num_choices = len(features[0][\"input_ids\"])\n",
-    "\n",
-    "        flattened = []\n",
-    "        for feat in features:\n",
-    "            for i in range(num_choices):\n",
-    "                flattened.append({k: v[i] for k, v in feat.items()})\n",
-    "\n",
-    "        # Try returning MindSpore tensors directly\n",
-    "        try:\n",
-    "            batch = self.tokenizer.pad(\n",
-    "                flattened,\n",
-    "                padding=self.padding,\n",
-    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
-    "                return_tensors=\"ms\",\n",
-    "            )\n",
-    "            out = {k: v.reshape((batch_size, num_choices, -1)) for k, v in batch.items()}\n",
-    "            if labels is not None:\n",
-    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
-    "            return out\n",
-    "        except Exception:\n",
-    "            # Fallback to numpy then convert\n",
-    "            batch_np = self.tokenizer.pad(\n",
-    "                flattened,\n",
-    "                padding=self.padding,\n",
-    "                pad_to_multiple_of=self.pad_to_multiple_of,\n",
-    "                return_tensors=\"np\",\n",
-    "            )\n",
-    "            out = {}\n",
-    "            for k, v in batch_np.items():\n",
-    "                arr = np.asarray(v).reshape((batch_size, num_choices, -1))\n",
-    "                if k in (\"input_ids\", \"attention_mask\", \"token_type_ids\"):\n",
-    "                    arr = arr.astype(np.int32, copy=False)\n",
-    "                out[k] = ms.Tensor(arr)\n",
-    "            if labels is not None:\n",
-    "                out[\"labels\"] = ms.Tensor(np.asarray(labels, dtype=np.int32), dtype=self.label_dtype)\n",
-    "            return out\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 6. 模型构建与训练配置\n",
-    "\n",
-    "加载 `BertForMultipleChoice` 模型，配置 `TrainingArguments`，并初始化 `Trainer`。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:49.805891Z",
-     "iopub.status.busy": "2025-12-14T16:21:49.805554Z",
-     "iopub.status.idle": "2025-12-14T16:21:52.499460Z",
-     "shell.execute_reply": "2025-12-14T16:21:52.498081Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[MS_ALLOC_CONF] config:  enable_vmm:True  vmm_align_size:2MB\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForMultipleChoice were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_37813/2626949888.py:34: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
-      "  trainer = Trainer(\n",
-      "The model is already on multiple devices. Skipping the move to device specified in `args`.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 4. Model\n",
-    "model = AutoModelForMultipleChoice.from_pretrained(model_checkpoint)\n",
-    "\n",
-    "# 5. Metrics\n",
-    "def compute_metrics(eval_predictions):\n",
-    "    if hasattr(eval_predictions, \"predictions\"):\n",
-    "        logits = eval_predictions.predictions\n",
-    "        labels = eval_predictions.label_ids\n",
-    "    else:\n",
-    "        logits, labels = eval_predictions\n",
-    "    preds = np.argmax(to_numpy(logits), axis=1)\n",
-    "    labels = to_numpy(labels)\n",
-    "    return {\"accuracy\": float((preds == labels).mean())}\n",
-    "\n",
-    "# 6. TrainingArguments (Fixed for current environment)\n",
-    "\n",
-    "train_args = TrainingArguments(\n",
-    "    output_dir=output_dir,\n",
-    "    learning_rate=5e-5,\n",
-    "    per_device_train_batch_size=8,\n",
-    "    per_device_eval_batch_size=8,\n",
-    "    num_train_epochs=3,\n",
-    "    weight_decay=0.01,\n",
-    "    eval_strategy=\"epoch\",  \n",
-    "    save_strategy=\"epoch\",\n",
-    "    save_total_limit=1,\n",
-    "    logging_steps=50,\n",
-    "    push_to_hub=False,\n",
-    "    remove_unused_columns=False,\n",
-    "    report_to=[],\n",
-    ")\n",
-    "\n",
-    "# 7. Trainer\n",
-    "trainer = Trainer(\n",
-    "    model=model,\n",
-    "    args=train_args,\n",
-    "    train_dataset=encoded[\"train\"],\n",
-    "    eval_dataset=encoded[\"validation\"],\n",
-    "    tokenizer=tokenizer,\n",
-    "    data_collator=DataCollatorForMultipleChoice(tokenizer),\n",
-    "    compute_metrics=compute_metrics,\n",
-    ")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 7. 执行训练与保存\n",
-    "\n",
-    "启动训练流程，完成后进行评估并保存模型权重。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:21:52.502318Z",
-     "iopub.status.busy": "2025-12-14T16:21:52.501983Z",
-     "iopub.status.idle": "2025-12-14T16:25:25.956052Z",
-     "shell.execute_reply": "2025-12-14T16:25:25.953760Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Starting training...\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='750' max='750' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [750/750 03:19, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Epoch</th>\n",
-       "      <th>Training Loss</th>\n",
-       "      <th>Validation Loss</th>\n",
-       "      <th>Accuracy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>1</td>\n",
-       "      <td>0.960300</td>\n",
-       "      <td>0.883173</td>\n",
-       "      <td>0.670000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>0.372600</td>\n",
-       "      <td>0.957089</td>\n",
-       "      <td>0.679000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3</td>\n",
-       "      <td>0.097600</td>\n",
-       "      <td>1.163250</td>\n",
-       "      <td>0.681000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Starting evaluation...\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [125/125 00:02]\n",
-       "    </div>\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Eval metrics: {'eval_loss': 1.1632496118545532, 'eval_accuracy': 0.681, 'eval_runtime': 2.83, 'eval_samples_per_second': 353.355, 'eval_steps_per_second': 44.169, 'epoch': 3.0}\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Model saved to: ./my_awesome_swag_model_ms\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 8. Run\n",
-    "print(\"\\n>>> Starting training...\")\n",
-    "trainer.train()\n",
-    "\n",
-    "print(\"\\n>>> Starting evaluation...\")\n",
-    "metrics = trainer.evaluate()\n",
-    "print(f\">>> Eval metrics: {metrics}\")\n",
-    "\n",
-    "# 9. Save\n",
-    "os.makedirs(output_dir, exist_ok=True)\n",
-    "trainer.save_model(output_dir)\n",
-    "try:\n",
-    "    tokenizer.save_pretrained(output_dir)\n",
-    "except Exception:\n",
-    "    pass\n",
-    "print(f\">>> Model saved to: {output_dir}\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 8. 推理演示\n",
-    "\n",
-    "从验证集中抽取样本，进行端到端的推理预测演示。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-14T16:25:25.962026Z",
-     "iopub.status.busy": "2025-12-14T16:25:25.961579Z",
-     "iopub.status.idle": "2025-12-14T16:25:26.000492Z",
-     "shell.execute_reply": "2025-12-14T16:25:25.999049Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Running Inference Demo...\n",
-      "--------------------------------------------------\n",
-      "Context: Students lower their eyes nervously.\n",
-      "Header : She\n",
-      "  [ ] pats her shoulder, then saunters toward someone.\n",
-      "  [ ] turns with two students.\n",
-      "  [x] walks slowly towards someone.\n",
-      "  [ ] wheels around as her dog thunders out.\n",
-      "--------------------------------------------------\n",
-      "Result: CORRECT (Pred: 2, Gold: 2)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# 10. Inference Demo\n",
-    "print(\"\\n>>> Running Inference Demo...\")\n",
-    "sample = raw[\"validation\"][0]\n",
-    "context = sample[\"sent1\"]\n",
-    "header = sample[\"sent2\"]\n",
-    "choices = [sample[e] for e in ending_names]\n",
-    "\n",
-    "first = [context] * 4\n",
-    "second = [f\"{header} {c}\" for c in choices]\n",
-    "\n",
-    "tok = tokenizer(first, second, truncation=True, padding=True, return_tensors=\"ms\")\n",
-    "inputs = {k: v.reshape((1, 4, -1)) for k, v in tok.items()}\n",
-    "\n",
-    "outputs = model(**inputs)\n",
-    "logits = outputs[\"logits\"] if isinstance(outputs, dict) else outputs.logits\n",
-    "pred = int(np.argmax(logits.asnumpy(), axis=1)[0])\n",
-    "\n",
-    "print(\"-\" * 50)\n",
-    "print(f\"Context: {context}\")\n",
-    "print(f\"Header : {header}\")\n",
-    "for i, c in enumerate(choices):\n",
-    "    mark = \"[x]\" if i == pred else \"[ ]\"\n",
-    "    print(f\"  {mark} {c}\")\n",
-    "print(\"-\" * 50)\n",
-    "\n",
-    "gold = int(sample[\"label\"])\n",
-    "if pred == gold:\n",
-    "    print(f\"Result: CORRECT (Pred: {pred}, Gold: {gold})\")\n",
-    "else:\n",
-    "    print(f\"Result: INCORRECT (Pred: {pred}, Gold: {gold})\")\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "ms_py311",
-   "language": "python",
-   "name": "ms_py311"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.14"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From 0ef39c43c7ffff3e284d1948f3cf91ca931e3dff Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Sun, 21 Dec 2025 02:31:51 +0800
Subject: [PATCH 3/6] 1

---
 nlp/multiple_choice/SWAG_Multiple_Choice.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb b/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
index 19c4fde..9beed3c 100644
--- a/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
+++ b/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
@@ -18,7 +18,7 @@
     "2.  **数据处理**：加载 SWAG 数据集，进行 Tokenization、Flatten 处理及动态 Padding。\n",
     "3.  **模型构建**：加载 BERT 预训练权重，构建多选分类网络。\n",
     "4.  **模型训练**：定义损失函数与优化器，执行微调。\n",
-    "5.  **模型推理**：加载微调后的模型，演示端到端推理（包含针对 MindNLP 跨框架特性的适配）。\n"
+    "5.  **模型推理**：加载微调后的模型，演示端到端推理。\n"
    ]
   },
   {

From 1595147a65d32f76853b080b777f42149193555d Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Tue, 30 Dec 2025 15:31:03 +0800
Subject: [PATCH 4/6] [Docs] clean and rename SWAG multiple choice notebook

---
 nlp/README.md                                 |   2 +-
 ...nb => finetune_bert_multiple_choice.ipynb} | 371 ++----------------
 2 files changed, 44 insertions(+), 329 deletions(-)
 rename nlp/multiple_choice/{SWAG_Multiple_Choice.ipynb => finetune_bert_multiple_choice.ipynb} (62%)

diff --git a/nlp/README.md b/nlp/README.md
index 8345145..6c5b393 100644
--- a/nlp/README.md
+++ b/nlp/README.md
@@ -6,7 +6,7 @@ This directory contains ready-to-use Natural Language Processing application not
 
 | No. | Model | Description |
 | :-- | :---- | :---------- |
-| 1   | [BERT (SWAG_Multiple_Choice)](./multiple_choice/SWAG_Multiple_Choice.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using Mind NLP. |
+| 1   | [BERT (SWAG Multiple Choice)](./multiple_choice/finetune_bert_multiple_choice.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using Mind NLP. |
 
 ## Contributing New NLP Applications
 
diff --git a/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb b/nlp/multiple_choice/finetune_bert_multiple_choice.ipynb
similarity index 62%
rename from nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
rename to nlp/multiple_choice/finetune_bert_multiple_choice.ipynb
index 9beed3c..1f72324 100644
--- a/nlp/multiple_choice/SWAG_Multiple_Choice.ipynb
+++ b/nlp/multiple_choice/finetune_bert_multiple_choice.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "bc899378",
+   "id": "0",
    "metadata": {},
    "source": [
     "# 基于 MindSpore 的 BERT 模型 SWAG 多选阅读理解任务\n",
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "fffcc076",
+   "id": "1",
    "metadata": {},
    "source": [
     "## 环境准备\n",
@@ -39,54 +39,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "49e9cf92",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:27.279544Z",
-     "iopub.status.busy": "2025-12-20T18:14:27.279122Z",
-     "iopub.status.idle": "2025-12-20T18:14:52.133383Z",
-     "shell.execute_reply": "2025-12-20T18:14:52.131939Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n",
-      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  setattr(self, word, getattr(machar, word).flat[0])\n",
-      "/root/.conda/envs/py311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.\n",
-      "  return self._float_to_str(self.smallest_subnormal)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/root/.conda/envs/py311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Modular Diffusers is currently an experimental feature under active development. The API is subject to breaking changes in future releases.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> MindSpore Version: 2.7.0\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import os\n",
     "import mindspore as ms\n",
@@ -117,7 +73,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4d355e84",
+   "id": "3",
    "metadata": {},
    "source": [
     "#### **定义辅助函数**\n",
@@ -131,24 +87,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "36eb9465",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:52.136850Z",
-     "iopub.status.busy": "2025-12-20T18:14:52.136224Z",
-     "iopub.status.idle": "2025-12-20T18:14:52.153736Z",
-     "shell.execute_reply": "2025-12-20T18:14:52.152509Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Context set to PYNATIVE | Ascend:0\n"
-     ]
-    }
-   ],
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# ----------------------------\n",
     "# MindSpore context\n",
@@ -223,7 +164,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e6557024",
+   "id": "5",
    "metadata": {},
    "source": [
     "## 数据加载与预处理\n",
@@ -236,32 +177,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "39916ecd",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:52.156477Z",
-     "iopub.status.busy": "2025-12-20T18:14:52.156148Z",
-     "iopub.status.idle": "2025-12-20T18:14:56.105448Z",
-     "shell.execute_reply": "2025-12-20T18:14:56.104088Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Model: google-bert/bert-base-uncased\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Data: Train=2000, Valid=1000\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 配置\n",
     "model_checkpoint = \"google-bert/bert-base-uncased\"\n",
@@ -286,7 +205,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cd438d15",
+   "id": "7",
    "metadata": {},
    "source": [
     "#### **数据预处理 (Tokenization)**\n",
@@ -300,25 +219,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "f14c6fcc",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:56.108448Z",
-     "iopub.status.busy": "2025-12-20T18:14:56.108110Z",
-     "iopub.status.idle": "2025-12-20T18:14:56.945719Z",
-     "shell.execute_reply": "2025-12-20T18:14:56.944331Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Data preprocessing completed.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 2. Tokenizer\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
@@ -352,7 +256,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8f739f70",
+   "id": "9",
    "metadata": {},
    "source": [
     "#### **定义 DataCollator**\n",
@@ -362,16 +266,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "f6808579",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:56.948719Z",
-     "iopub.status.busy": "2025-12-20T18:14:56.948398Z",
-     "iopub.status.idle": "2025-12-20T18:14:56.963209Z",
-     "shell.execute_reply": "2025-12-20T18:14:56.962011Z"
-    }
-   },
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
    "outputs": [],
    "source": [
     "# ----------------------------\n",
@@ -429,7 +326,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "368fbebe",
+   "id": "11",
    "metadata": {},
    "source": [
     "## 模型构建\n",
@@ -439,40 +336,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "41a6399a",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:56.966036Z",
-     "iopub.status.busy": "2025-12-20T18:14:56.965545Z",
-     "iopub.status.idle": "2025-12-20T18:14:58.261180Z",
-     "shell.execute_reply": "2025-12-20T18:14:58.260039Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[MS_ALLOC_CONF]Runtime config:  enable_vmm:True  vmm_align_size:2MB\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForMultipleChoice were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model loaded.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 4. Model\n",
     "model = AutoModelForMultipleChoice.from_pretrained(model_checkpoint)\n",
@@ -481,7 +348,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "16ac1dcf",
+   "id": "13",
    "metadata": {},
    "source": [
     "## 模型训练\n",
@@ -492,26 +359,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "f6188891",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:14:58.264194Z",
-     "iopub.status.busy": "2025-12-20T18:14:58.263795Z",
-     "iopub.status.idle": "2025-12-20T18:15:00.066353Z",
-     "shell.execute_reply": "2025-12-20T18:15:00.064796Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_54703/2519269147.py:30: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
-      "  trainer = Trainer(\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 5. Metrics\n",
     "def compute_metrics(eval_predictions):\n",
@@ -555,7 +406,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4bf8bd26",
+   "id": "15",
    "metadata": {},
    "source": [
     "#### **执行训练与保存**\n",
@@ -565,120 +416,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "9611e086",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:15:00.069802Z",
-     "iopub.status.busy": "2025-12-20T18:15:00.069470Z",
-     "iopub.status.idle": "2025-12-20T18:22:24.765471Z",
-     "shell.execute_reply": "2025-12-20T18:22:24.763076Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Starting training...\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='750' max='750' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [750/750 07:10, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Epoch</th>\n",
-       "      <th>Training Loss</th>\n",
-       "      <th>Validation Loss</th>\n",
-       "      <th>Accuracy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>1</td>\n",
-       "      <td>0.930500</td>\n",
-       "      <td>0.897192</td>\n",
-       "      <td>0.650000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>0.452700</td>\n",
-       "      <td>0.942733</td>\n",
-       "      <td>0.666000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3</td>\n",
-       "      <td>0.194200</td>\n",
-       "      <td>1.143144</td>\n",
-       "      <td>0.681000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Starting evaluation...\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [125/125 00:05]\n",
-       "    </div>\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Eval metrics: {'eval_loss': 1.1431440114974976, 'eval_accuracy': 0.681, 'eval_runtime': 5.1806, 'eval_samples_per_second': 193.028, 'eval_steps_per_second': 24.129, 'epoch': 3.0}\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> Model saved to: ./my_awesome_swag_model_ms\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "id": "16",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 8. Run\n",
     "print(\"\\n>>> Starting training...\")\n",
@@ -700,7 +441,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e52e851f",
+   "id": "17",
    "metadata": {},
    "source": [
     "## 模型推理\n",
@@ -716,35 +457,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "01860b80",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-12-20T18:22:24.770484Z",
-     "iopub.status.busy": "2025-12-20T18:22:24.770015Z",
-     "iopub.status.idle": "2025-12-20T18:22:24.834243Z",
-     "shell.execute_reply": "2025-12-20T18:22:24.832738Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      ">>> Running Inference Demo...\n",
-      ">>> Inference model device: device(type=npu, index=0)\n",
-      "--------------------------------------------------\n",
-      "Context: Students lower their eyes nervously.\n",
-      "Header : She\n",
-      "  [ ] pats her shoulder, then saunters toward someone.\n",
-      "  [ ] turns with two students.\n",
-      "  [x] walks slowly towards someone.\n",
-      "  [ ] wheels around as her dog thunders out.\n",
-      "--------------------------------------------------\n",
-      "Result: CORRECT (Pred: 2, Gold: 2)\n"
-     ]
-    }
-   ],
+   "id": "18",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# 10. Inference Demo (FIXED)\n",
     "print(\"\\n>>> Running Inference Demo...\")\n",

From b75c3a390b4419f9f11b3c7f21198d94686df77f Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Wed, 31 Dec 2025 12:57:08 +0800
Subject: [PATCH 5/6] Move BERT SWAG notebook to llm

---
 .DS_Store                                        | Bin 6148 -> 0 bytes
 llm/README.md                                    |   1 +
 .../bert}/finetune_bert_multiple_choice.ipynb    |  12 ++++++------
 nlp/.DS_Store                                    | Bin 6148 -> 0 bytes
 nlp/README.md                                    |   4 +---
 nlp/multiple_choice/.DS_Store                    | Bin 6148 -> 0 bytes
 6 files changed, 8 insertions(+), 9 deletions(-)
 delete mode 100644 .DS_Store
 rename {nlp/multiple_choice => llm/bert}/finetune_bert_multiple_choice.ipynb (96%)
 delete mode 100644 nlp/.DS_Store
 delete mode 100644 nlp/multiple_choice/.DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 225deb74a558d957ae8b21cc9eff906e52d07799..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKU60a06ukqi6j6hTCi`O2#Mc#JU89LFEDMR7s1J+L#E)Q0VQnle(z2Kk5}x%x
z_!oTjm-t_N(sO5qq%HWW#LOi#XF7B4?exqSLqwuI^O{68BC?PeOP7&tF>dFyVFg`t
z9Vlds<TQu|ZV<T}TvK5bFbeFR0=#zj$f0u@Qb_6b%Ui-RQvH`S*>z^l$RnSk)JkXO
zw;Z!T1G>m^#I0?dkrxl#C<-`!MTQ>H2rr5GB^2gDe946<qT|j6>t~KtrYCetC)A~9
z)T3@TX5l=?E>eKsILne!d`(GFr7?OLP=bEmK{-O-9SU+@EKAy}2$2JtWLZ*}Ei88)
zPxNN`-JJw}oHQCgM4?#PyHvKyR@HjjdmqhviIdFYwljUrPtPHxyDmHK3x7E8*KV9d
zVdD7Va4H>qZwShpmwxC)^L8`~y;%D8RD)HqD*f92V$nQ2Jg|?Bn#%)w@$f+d<$L#+
z%ZhdL_T5Kk{SU!7j6UQ0U;^8}w5uA|z~iOAc|Fif)m0b0X&6Ky4}hwj@=VN$!y(e*
zROk!?c}yL83amqVFY6%(H0^OE<I$&A^mv<};AxZgtI-sz;v<IHm!+{<Bbb_X4Ev{T
zt3_N(O%a6KY14ydRd>3VrgKICqrksVfcFO*iLs?|rBH4i$mA6OSVFZll=<%f``8*=
z8dnO@0u#y=s9c4<VhH7qdRy~a8dnOHI|+UH5c<tR-%x~nJL21tPNJpI<VFFbKvsdG
zdaUvJ-~IFbKkH<ki~>f1|4IQ->hwBoe3Cv}H$INfS{vyd5*zbY3Z)A&eH_aIAI1Af
b($Hpe0oc;GQivLu`6D1@Fqu){uPX2x5fAnS

diff --git a/llm/README.md b/llm/README.md
index 679a681..351b1e9 100644
--- a/llm/README.md
+++ b/llm/README.md
@@ -11,6 +11,7 @@ The following notebooks are actively maintained in sync with MindSpore and MindS
 | No. | Model | Description              |
 | :-- | :---- | :----------------------- |
 | 1   | [t5](./t5/) | Includes notebooks for T5 finetuning and inference on tasks such as email summarization |
+| 2   | [BERT (SWAG Multiple Choice)](./bert/finetune_bert_multiple_choice.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using MindSpore NLP |
 
 ### Community-Driven / Legacy Applications
 
diff --git a/nlp/multiple_choice/finetune_bert_multiple_choice.ipynb b/llm/bert/finetune_bert_multiple_choice.ipynb
similarity index 96%
rename from nlp/multiple_choice/finetune_bert_multiple_choice.ipynb
rename to llm/bert/finetune_bert_multiple_choice.ipynb
index 1f72324..161d2b2 100644
--- a/nlp/multiple_choice/finetune_bert_multiple_choice.ipynb
+++ b/llm/bert/finetune_bert_multiple_choice.ipynb
@@ -11,7 +11,7 @@
     "\n",
     "**SWAG** (Situations With Adversarial Generations) 是一个大规模的对抗性数据集，用于基于常识的自然语言推理 (NLI)。给定一个部分描述的事件作为上下文，任务是从四个选项中选择最合理的结尾。\n",
     "\n",
-    "本案例基于 **MindSpore** 框架和 **MindNLP** 套件，使用 **BERT** (Bidirectional Encoder Representations from Transformers) 预训练模型在 SWAG 数据集上进行微调 (Fine-tune)，实现多项选择任务的自动推理。\n",
+    "本案例基于 **MindSpore** 框架和 **MindSpore NLP** 套件，使用 **BERT** (Bidirectional Encoder Representations from Transformers) 预训练模型在 SWAG 数据集上进行微调 (Fine-tune)，实现多项选择任务的自动推理。\n",
     "\n",
     "**核心流程：**\n",
     "1.  **环境准备**：配置 MindSpore 运行环境及 HF-Mirror 镜像加速。\n",
@@ -30,7 +30,7 @@
     "\n",
     "本案例运行环境要求如下：\n",
     "\n",
-    "| Python | MindSpore | MindNLP |\n",
+    "| Python | MindSpore | MindSpore NLP |\n",
     "| :----- | :-------- | :------ |\n",
     "| 3.9+   | >= 2.7.0    | >= 0.5.1  |\n",
     "\n",
@@ -57,7 +57,7 @@
     "os.environ.setdefault(\"TOKENIZERS_PARALLELISM\", \"false\")\n",
     "\n",
     "# ----------------------------\n",
-    "# Import mindnlp.transformers\n",
+    "# Import MindSpore NLP transformers\n",
     "# ----------------------------\n",
     "import mindnlp  # noqa: F401\n",
     "\n",
@@ -449,8 +449,8 @@
     "为了验证模型效果，我们进行一次端到端的推理演示。\n",
     "\n",
     "**注意（关键修复）：**\n",
-    "在 MindNLP 环境下，为了确保推理的稳定性和跨后端兼容性，我们采取以下策略：\n",
-    "1.  **`return_tensors=\"pt\"`**: 使用 PyTorch 兼容的 Tensor 格式（MindNLP 会自动代理到 MindTorch）。\n",
+    "在 MindSpore NLP 环境下，为了确保推理的稳定性和跨后端兼容性，我们采取以下策略：\n",
+    "1.  **`return_tensors=\"pt\"`**: 使用 PyTorch 兼容的 Tensor 格式（MindSpore NLP 会自动代理到 MindTorch）。\n",
     "2.  **`move_inputs_to_device`**: 显式将输入数据移动到模型参数所在的设备，避免 \"All tensor arguments must be on the same device\" 错误。\n"
    ]
   },
@@ -464,7 +464,7 @@
     "# 10. Inference Demo (FIXED)\n",
     "print(\"\\n>>> Running Inference Demo...\")\n",
     "\n",
-    "# 推理阶段需要 torch/no_grad；在 mindnlp 环境下 torch 会被代理到 mindtorch\n",
+    "# 推理阶段需要 torch/no_grad；在 MindSpore NLP 环境下 torch 会被代理到 mindtorch\n",
     "import torch\n",
     "\n",
     "model.eval()\n",
diff --git a/nlp/.DS_Store b/nlp/.DS_Store
deleted file mode 100644
index fa701c2d7ccacf251c5f37c3558b0bc6fdb8145f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKK~LK-6n-uR5+Sr6n8Y#Cu86e^G$bxrR}MSuLeO?#Q)!xPL<@1%r0b!ol=Jpu
zcHJ-8-`T!r8w7My4@hW2?@7<!v;96N{v6pc5vlGZ=@7Mu$b&OBd=!5%?q^@HhHF^`
zDl<k#Bg!eIJ`ESLrQv^6fY<H`UD5>WR?*G%yC9AFkkU7~9>%$jk;$LQEBNkjV~Ccr
z<7qi^?4R){&&sOZ{-qi#&DFJaZ{6GQK1LUM8dZa8Qg#R9Gp?O#oyS-4Abyvn(|&8~
zwJxebR-|J?$dVK(AKqt0qNiOwDU#A~BirHmp5Je6&t{#yy`5lxzcb$nW`{4^h+iDc
z=f3y!+4G~5{+Il+(BI5FNZ}hcw(0N+z9U$o(Wj&w#&~NUVoB=PXk6sF_>4@>_UVL<
zA!?7_0(Y?#$sNIeAnT^!sEUaKqJSuHKLy-==WX0?7-5S7qQE0mfcJ+0&KNqZEt;(Z
zg}DL%n`kzMn12$P6F3YV))vtNQ<e&}ROPN1%F@vuxV+F|ZPC(6xyy%gPgd@RqRi7V
zeqho`g%+h01w?_{3ar>_i_ib#f8YOalcXjJhyssF0oCk9y)LHY&eny=@mU+d&*5yG
nS6jTMATUQUa``CUh8shBzyUCHSX)F3OnwB63{r>!530Z)BL#RA

diff --git a/nlp/README.md b/nlp/README.md
index 6c5b393..203c638 100644
--- a/nlp/README.md
+++ b/nlp/README.md
@@ -4,9 +4,7 @@ This directory contains ready-to-use Natural Language Processing application not
 
 ## Application List
 
-| No. | Model | Description |
-| :-- | :---- | :---------- |
-| 1   | [BERT (SWAG Multiple Choice)](./multiple_choice/finetune_bert_multiple_choice.ipynb) | Fine-tuning BERT on SWAG dataset for Multiple Choice tasks using Mind NLP. |
+Currently no notebooks are listed in this directory.
 
 ## Contributing New NLP Applications
 
diff --git a/nlp/multiple_choice/.DS_Store b/nlp/multiple_choice/.DS_Store
deleted file mode 100644
index 2a48537ade332bec09c484a35dcc1fa53beca53b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeH~Jx;?w5QX1Dibxcsq}&rk;s#5U6x195Fd&2!kqC$oojZ;VZ+<{nhJuCyy^&_$
z?s&#)e}%^jpe%OR$G{B0lx~Vw8^&hW%}2JEkwT^O9_Q$Ag9kieKFa<+V6}&M#4|2&
zJE*_I7B9F*ce>tOb?Z&XOnb}|`uxp}Gnoj8fCz|y2#CP%2w3l>O%Fmfihu}+z$XFw
zK2*ACOYKAb)4{=y0MrS?Zk+pBf?6#=ZK-`IBeW8w)F>@^#3)hDc+I?)+J{Ctito((
z<jy5eD8+YYym~rH3)LtBA}}PdAIq8b{}ug*^Z&3&D-jTZe?_27mS@WaUnzU*=;f@}
uR{90~!<g&o9K01%y%lp|t@!9mUbQ)|x70q=%Nh4_s(u8Fi%bOmg1|fR`y9mp


From 4389cdb73feed13ffa4c67e358e43085a0922377 Mon Sep 17 00:00:00 2001
From: Magnus0100 <weilongguan33@gmail.com>
Date: Wed, 31 Dec 2025 13:02:32 +0800
Subject: [PATCH 6/6] Restore nlp README

---
 nlp/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nlp/README.md b/nlp/README.md
index 203c638..a2c9dae 100644
--- a/nlp/README.md
+++ b/nlp/README.md
@@ -4,7 +4,9 @@ This directory contains ready-to-use Natural Language Processing application not
 
 ## Application List
 
-Currently no notebooks are listed in this directory.
+| No. | Model | Description                     |
+| :-- | :---- | :------------------------------ |
+| 1   | / | This section is empty for now — feel free to contribute your first application! |
 
 ## Contributing New NLP Applications
 

Epoch	Training Loss	Validation Loss	Accuracy
1	0.960300	0.883173	0.670000
2	0.372600	0.957089	0.679000
3	0.097600	1.163250	0.681000
Epoch	Training Loss	Validation Loss	Accuracy
1	0.930500	0.897192	0.650000
2	0.452700	0.942733	0.666000
3	0.194200	1.143144	0.681000