From c00bd53c2a2bb7fc1bedb73eedf8ad0f225fdabd Mon Sep 17 00:00:00 2001 From: bgo-eiu <100172442+bgo-eiu@users.noreply.github.com> Date: Sun, 28 Aug 2022 18:06:56 -0400 Subject: [PATCH] addressed overapplication of virama, normalized nukta characters addresses https://phabricator.wikimedia.org/T91159 --- rules/pa/pa-transliteration.js | 118 ++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/rules/pa/pa-transliteration.js b/rules/pa/pa-transliteration.js index 33bbf4f13..8aaae73b7 100644 --- a/rules/pa/pa-transliteration.js +++ b/rules/pa/pa-transliteration.js @@ -5,11 +5,11 @@ id: 'pa-transliteration', name: 'Punjabi Transliteration', description: 'Punjabi transliteration', - date: '2012-10-16', + date: '2022-08-28', URL: 'http://github.com/wikimedia/jquery.ime', - author: 'Amir E. Aharoni, inputs from Saurabh Choudhary and Surinder Wadhawan', + author: 'Amir E. Aharoni, inputs from Saurabh Choudhary, Surinder Wadhawan, bgo_eiu', license: 'GPLv3', - version: '1.0', + version: '2.0', contextLength: 2, maxKeyLength: 4, /* Semi-automatically created from the Hindi transliteration mapping using @@ -17,23 +17,23 @@ * s{(?[ऀ-ॿ])}{chr(ord($+{deva_letter}) + 0x100)}xmsge; */ patterns: [ - [ 'ਕ੍h', 'c', 'ਚ੍' ], + [ 'ਕh', 'c', 'ਚ' ], [ '\\\\([A-Za-z\\>_~\\.0-9])', '\\\\', '$1' ], // ਕ-ਹ is the main range of Indic letters. - // ੜ is an additional unique Gurmukhi letter. - [ '([ਕ-ਹੜ]਼?)੍a', '$1' ], // Short [a] after a consonant with virama removes the virama - [ '([ਕ-ਹੜ]਼?)੍A', '$1ਾ' ], // Long [a] after a consonant with virama removes the virama and adds long [a] - [ '([ਕ-ਹੜ]਼?)a', '$1ਾ' ], // 'aa' gives long [a] - short [a] after a consonant without virama adds long [a] - [ '([ਕ-ਹੜ]਼?)੍i', '$1ਿ' ], - [ '([ਕ-ਹੜ]਼?)(ਿi|੍I|ੇe)', '$1ੀ' ], // 'ii', 'I' and 'ee' give long [i]. - [ '([ਕ-ਹੜ]਼?)੍u', '$1ੁ' ], - [ '([ਕ-ਹੜ]਼?)(ੁu|੍U|ੋo)', '$1ੂ' ], // 'uu', 'U' and 'oo' give long [u]. - [ '([ਕ-ਹੜ]਼?)੍e', '$1ੇ' ], - [ '([ਕ-ਹੜ]਼?)(i|੍E)', '$1ੈ' ], // 'i' after a consonant without virama or 'E' after a consonant with Virama gives "ai" - [ '([ਕ-ਹੜ]਼?)੍[oO]', '$1ੋ' ], - [ '([ਕ-ਹੜ]਼?)u', '$1ੌ' ], // 'u' after a consonant without virama gives "au" - [ '([ਕ-ਹੜ])੍\\`', '$1਼੍' ], // '`' (backtick) after a consonant with virama adds a nukta before the virama + // ਖ਼, ਗ਼, ਜ਼, ੜ, ਫ਼ is are additional Gurmukhi letters. + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)', '$1' ], + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)a', '$1ਾ' ], // 'a' after a consonant adds long [a] + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)i', '$1ਿ' ], + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)(ਿi|I|ੇe)', '$1ੀ' ], // 'ii', 'I' and 'ee' give long [i]. + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)u', '$1ੁ' ], + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)(ੁu|U|ੋo)', '$1ੂ' ], // 'uu', 'U' and 'oo' give long [u]. + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)e', '$1ੇ' ], + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)E', '$1ੈ' ], // 'E' after a consonant gives "ai" + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)o', '$1ੋ' ], + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼]਼?)O', '$1ੌ' ], // 'u' after a consonant gives "au" + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼])~', '$1ੑ' ], // '~' after a consonant adds virama + [ '([ਕ-ਹਖ਼ਗ਼ਜ਼ੜਫ਼])੍\\`', '$1਼੍' ], // '`' (backtick) after a consonant with virama adds a nukta before the virama [ 'ਅa', 'ਆ' ], // aa [ '(ਓo|ਉu)', 'ਊ' ], // oo, uu @@ -43,61 +43,68 @@ [ '(ਏe|ਇi)', 'ਈ' ], // ee, ii [ 'ਅu', 'ਔ' ], // au [ 'ਂ[Mm^]', 'ਁ' ], // bindi + 'm', 'M', or '^' -> Adak bindi - [ 'ਣ੍N', 'ੰ' ], // Tippi - nasalization + [ 'ਣN', 'ੰ' ], // Tippi - nasalization - [ 'ਕ੍h', 'ਖ੍' ], // kh - [ 'ਗ੍h', 'ਘ੍' ], // gh - [ 'ਨ੍g', 'ਙ੍' ], // ng - [ 'ਚ੍h', 'ਛ੍' ], // ch - [ 'ਜ੍h', 'ਝ੍' ], // jh - [ 'ਨ੍j', 'ਞ੍' ], // nj - [ 'ਟ੍h', 'ਠ੍' ], // Th - [ 'ਡ੍h', 'ਢ੍' ], // Dh - [ 'ਤ੍h', 'ਥ੍' ], // th - [ 'ਦ੍h', 'ਧ੍' ], // dh - [ 'ਪ੍h', 'ਫ੍' ], // ph - [ 'ਬ੍h', 'ਭ੍' ], // bh + [ 'ਕh', 'ਖ' ], // kh + [ 'ਗh', 'ਘ' ], // gh + [ 'ਨg', 'ਙ' ], // ng + [ 'ਚh', 'ਛ' ], // ch + [ 'ਜh', 'ਝ' ], // jh + [ 'ਨj', 'ਞ' ], // nj + [ 'ਟh', 'ਠ' ], // Th + [ 'ਡh', 'ਢ' ], // Dh + [ 'ਤh', 'ਥ' ], // th + [ 'ਦh', 'ਧ' ], // dh + [ 'ਪh', 'ਫ' ], // ph + [ 'ਬh', 'ਭ' ], // bh - [ 'ਸ੍h', 'ਸ਼੍' ], // sh - [ 'ਕ਼੍h', 'ਖ਼੍' ], // k + nukta + h + [ 'ਸh', 'ਸ਼' ], // sh + [ 'ਕ਼h', 'ਖ਼' ], // k + nukta + h [ 'a', 'ਅ' ], - [ 'b', 'ਬ੍' ], - [ 'c', 'ਚ੍' ], - [ 'd', 'ਦ੍' ], + [ 'b', 'ਬ' ], + [ 'c', 'ਚ' ], + [ 'd', 'ਦ' ], [ 'e', 'ਏ' ], - [ 'f', 'ਫ੍' ], - [ 'F', 'ਫ਼੍' ], // With nukta - [ 'g', 'ਗ੍' ], - [ 'h', 'ਹ੍' ], + [ 'f', 'ਫ' ], + [ 'F', 'ਫ਼' ], + [ 'g', 'ਗ' ], + [ 'h', 'ਹ' ], [ 'i', 'ਇ' ], - [ 'j', 'ਜ੍' ], - [ 'k', 'ਕ੍' ], - [ 'l', 'ਲ੍' ], - [ 'm', 'ਮ੍' ], - [ 'n', 'ਨ੍' ], + [ 'j', 'ਜ' ], + [ 'k', 'ਕ' ], + [ 'l', 'ਲ' ], + [ 'm', 'ਮ' ], + [ 'n', 'ਨ' ], [ 'o', 'ਓ' ], - [ 'p', 'ਪ੍' ], - [ 'q', 'ੑ' ], // Udaat - [ 'r', 'ਰ੍' ], - [ 's', 'ਸ੍' ], - [ 't', 'ਤ੍' ], + [ 'p', 'ਪ' ], + [ 'q', 'ਕ਼' ], + [ 'r', 'ਰ' ], + [ 's', 'ਸ' ], + [ 't', 'ਤ' ], [ 'u', 'ਉ' ], [ '(v|w)', 'ਵ੍' ], [ 'y', 'ਯ੍' ], [ 'z', 'ੱ' ], // Addak - gemination [ 'A', 'ਆ' ], - [ 'D', 'ਡ੍' ], + [ 'D', 'ਡ' ], + [ 'F', 'ਫ਼' ], + [ 'G', 'ਗ਼' ], [ 'H', 'ਃ' ], // Visarga [ 'I', 'ਈ' ], + [ 'J', 'ਜ਼' ], + [ 'K', 'ਖ਼' ], + [ 'L', 'ਲ਼'], [ 'M', 'ਂ' ], // Bindi - [ 'N', 'ਣ੍' ], - [ 'R', 'ੜ੍' ], // Rra - [ 'S', 'ਸ਼੍' ], - [ 'T', 'ਟ੍' ], + [ 'N', 'ਣ' ], + [ 'Q', 'ੑ'], // Udaat + [ 'R', 'ੜ' ], // Rra + [ 'S', 'ਸ਼' ], + [ 'T', 'ਟ' ], [ 'U', 'ਊ' ], [ 'X', 'ੴ' ], // Ek onkar [ 'Y', 'ੵ' ], // Yakash + [ 'Z', '.' ], [ '0', '੦' ], [ '1', '੧' ], [ '2', '੨' ], @@ -112,7 +119,8 @@ [ '\\`', '਼' ], // Nukta [ '।\\.', '॥' ], // Double danda, must be before single danda - [ '\\.', '।' ] ] // Danda + [ '\\.', '।' ] // Danda + ] }; $.ime.register( paTransliteration );