diff --git a/.github/workflows/eval-functions.yml b/.github/workflows/eval-functions.yml index 796b66c..4fe99a2 100644 --- a/.github/workflows/eval-functions.yml +++ b/.github/workflows/eval-functions.yml @@ -104,12 +104,12 @@ jobs: mkdir -p eval-results EVAL_ERRORS=0 - echo "Running explainText evaluation..." - genkit eval:flow explainText \ - --input datasets/explain-chinese.json \ - --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/grammarExplanationQuality \ - --batchSize 10 \ - --output eval-results/explain-chinese-results.json || { echo "⚠️ explainText evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + # echo "Running explainText evaluation..." + # genkit eval:flow explainText \ + # --input datasets/explain-chinese.json \ + # --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/grammarExplanationQuality \ + # --batchSize 10 \ + # --output eval-results/explain-chinese-results.json || { echo "⚠️ explainText evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } echo "Running explainEnglish evaluation..." genkit eval:flow explainEnglish \ @@ -118,26 +118,26 @@ jobs: --batchSize 10 \ --output eval-results/explain-english-results.json || { echo "⚠️ explainEnglish evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - echo "Running generateChineseSentences evaluation..." - genkit eval:flow generateChineseSentences \ - --input datasets/generate-chinese-sentences.json \ - --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/sentenceGenerationQuality \ - --batchSize 10 \ - --output eval-results/generate-sentences-results.json || { echo "⚠️ generateChineseSentences evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - - echo "Running analyzeCollocation evaluation..." - genkit eval:flow analyzeCollocation \ - --input datasets/analyze-collocation.json \ - --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ - --batchSize 10 \ - --output eval-results/collocation-results.json || { echo "⚠️ analyzeCollocation evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - - echo "Running explainWordInContext evaluation..." - genkit eval:flow explainWordInContext \ - --input datasets/explain-word-in-context.json \ - --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ - --batchSize 10 \ - --output eval-results/word-context-results.json || { echo "⚠️ explainWordInContext evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + # echo "Running generateChineseSentences evaluation..." + # genkit eval:flow generateChineseSentences \ + # --input datasets/generate-chinese-sentences.json \ + # --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/sentenceGenerationQuality \ + # --batchSize 10 \ + # --output eval-results/generate-sentences-results.json || { echo "⚠️ generateChineseSentences evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + + # echo "Running analyzeCollocation evaluation..." + # genkit eval:flow analyzeCollocation \ + # --input datasets/analyze-collocation.json \ + # --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ + # --batchSize 10 \ + # --output eval-results/collocation-results.json || { echo "⚠️ analyzeCollocation evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + + # echo "Running explainWordInContext evaluation..." + # genkit eval:flow explainWordInContext \ + # --input datasets/explain-word-in-context.json \ + # --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ + # --batchSize 10 \ + # --output eval-results/word-context-results.json || { echo "⚠️ explainWordInContext evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } if [ $EVAL_ERRORS -gt 0 ]; then echo "⚠️ $EVAL_ERRORS evaluation(s) had errors - check results for details" diff --git a/functions/prompts/explain-english.prompt b/functions/prompts/explain-english.prompt index f92836b..dad0fe2 100644 --- a/functions/prompts/explain-english.prompt +++ b/functions/prompts/explain-english.prompt @@ -7,9 +7,56 @@ output: schema: EnglishExplanationSchema --- {{role "system"}} -You are a helpful Chinese teacher for speakers of English who want to learn Chinese. You provide clear, concise explanations that help learners understand Chinese. +You are an expert Chinese language tutor. You have thoroughly studied the Chinese Grammar Wiki and HSK Standard Course textbooks, and you use their terminology and teaching approaches. + +Prioritize accuracy over comprehensiveness — only explain what you are confident about. +Keep your explanations focused and practical for a language learner. + +CRITICAL REQUIREMENTS: +1. Your pinyin, vocabulary breakdown, and grammar explanations must exactly match the Chinese characters in your translation. Do not explain words or characters that are not present in your translation. +2. When describing grammar structures in your explanation, verify they match the actual translation you provided. Do not claim you used a structure (like 比 or 把) if your translation uses a different one. +3. For stative sentences (describing states like "the door is open"), remember to use 着 or other appropriate aspect markers. + {{role "user"}} Translate the English text input by the user into Chinese, and explain the translation. -Here's the user's English input: +Here are some examples: + +**Example 1:** +Input: "This is the tallest building I've ever seen" +Translation: 这是我见过的最高的建筑。 +Pinyin: Zhè shì wǒ jiàn guo de zuì gāo de jiànzhù. +Explanation: This sentence uses 是 as a simple copula (meaning "is"), NOT the emphatic 是...的 construction. The 的 here is the attributive particle connecting the modifier "我见过" to the noun. The 过 indicates past experience ("have seen"). + +**Example 2:** +Input: "I waited for her for two hours" +Translation: 我等了她两个小时。 +Pinyin: Wǒ děng le tā liǎng gè xiǎoshí. +Explanation: When the object is a pronoun (她), it comes directly after the verb+了, and the duration (两个小时) goes at the end: Verb + 了 + Pronoun + Duration. + +**Example 3:** +Input: "Rather than watching TV, it's better to read a book" +Translation: 与其看电视,不如看书。 +Pinyin: Yǔqí kàn diànshì, bùrú kàn shū. +Explanation: The 与其...不如... structure means "rather than X, it's better to Y." 与其 introduces the less preferred option, 不如 introduces the preferred one. This is different from using 比较好. + +**Example 4:** +Input: "Only she understands" +Translation: 只有她懂。 +Pinyin: Zhǐyǒu tā dǒng. +Explanation: 只有 means "only." This sentence does NOT contain 的 — do not explain 的 when it's not present. The structure is simply: 只有 + subject + verb. + +**Example 5:** +Input: "I just got home" +Translation: 我刚到家。 +Pinyin: Wǒ gāng dào jiā. +Explanation: 刚 means "just (now)." Note: this translation does NOT use 了 — the 刚 alone conveys the recent completion. Do not explain 了 when it's not in the sentence. + +**Example 6:** +Input: "Even teachers make mistakes" +Translation: 连老师都会犯错。 +Pinyin: Lián lǎoshī dōu huì fàn cuò. +Explanation: 连...都... means "even..." for emphasis. This sentence does NOT use 是...的 — it's a simple statement with the 连...都 pattern. Do not mention 是...的 when it's not present. + +Now translate and explain this input: {{text}} diff --git a/functions/src/genkit-eval.ts b/functions/src/genkit-eval.ts index 80a2f91..5f78695 100644 --- a/functions/src/genkit-eval.ts +++ b/functions/src/genkit-eval.ts @@ -38,7 +38,7 @@ const ai = genkit({ projectId: process.env.GCLOUD_PROJECT || 'hanzigraph', }), ], - model: vertexAI.model('gemini-3-flash-preview'), + model: vertexAI.model('gemini-3-pro-preview'), }); // Register schemas - MUST be done before loading prompts