Skip to content

Commit 42f3494

Browse files
authored
Merge branch 'main' into staging_hi_itn
Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com>
2 parents 2e14f68 + 4844e1f commit 42f3494

File tree

245 files changed

+9430
-562
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+9430
-562
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,30 @@ ci:
2222

2323
repos:
2424
- repo: https://github.com/pre-commit/pre-commit-hooks
25-
rev: v5.0.0
25+
rev: v6.0.0
2626
hooks:
2727
- id: check-yaml
2828
- id: check-case-conflict
2929
- id: detect-private-key
3030
- id: requirements-txt-fixer
3131

3232
- repo: https://github.com/PyCQA/flake8
33-
rev: 7.2.0
33+
rev: 7.3.0
3434
hooks:
3535
- id: flake8
3636
args:
3737
- --select=W605
3838

3939
- repo: https://github.com/PyCQA/isort
40-
rev: 6.0.1
40+
rev: 6.1.0
4141
hooks:
4242
- id: isort
4343
name: Format imports
4444
args: [ --multi-line=3, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=119, -rc, -ws ]
4545
exclude: docs/
4646

47-
- repo: https://github.com/psf/black
48-
rev: 25.1.0
47+
- repo: https://github.com/psf/black-pre-commit-mirror
48+
rev: 25.9.0
4949
hooks:
5050
- id: black
5151
name: Format code

Jenkinsfile

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ pipeline {
22
agent {
33
docker {
44
image 'tnitn_ci_py310:24.07'
5-
args '-v /mnt/jenkins/jenkinsci:/home/jenkins -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
5+
args '-v /mnt/jenkins/jenkinsci/TestData:/home/jenkins/TestData -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
66
}
77
}
88
options {
@@ -19,10 +19,11 @@ pipeline {
1919
HU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/07-16-24-0'
2020
PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
2121
RU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
22-
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
22+
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-29-25-0'
2323
SV_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
2424
ZH_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/11-13-24-0'
2525
IT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-22-24-0'
26+
HE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-24-25-0'
2627
HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
2728
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
2829
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
@@ -170,7 +171,7 @@ pipeline {
170171
}
171172
}
172173

173-
stage('L0: Create FR TN/ITN & VI ITN & HU TN & IT TN') {
174+
stage('L0: Create FR TN/ITN & VI TN/ITN & HU TN & IT TN') {
174175
when {
175176
anyOf {
176177
branch 'main'
@@ -196,6 +197,11 @@ pipeline {
196197
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
197198
}
198199
}
200+
stage('L0: VI TN grammars') {
201+
steps {
202+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
203+
}
204+
}
199205
stage('L0: HU TN grammars') {
200206
steps {
201207
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
@@ -252,7 +258,24 @@ pipeline {
252258
}
253259
}
254260
}
255-
261+
stage('L0: Create He TN/ITN Grammars & MR') {
262+
when {
263+
anyOf {
264+
branch 'main'
265+
branch 'staging/**'
266+
branch 'staging_*'
267+
changeRequest target: 'main'
268+
}
269+
}
270+
failFast true
271+
parallel {
272+
stage('L0: HE ITN grammars') {
273+
steps {
274+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
275+
}
276+
}
277+
}
278+
}
256279
stage('L0: Create HY TN/ITN Grammars & MR') {
257280
when {
258281
anyOf {
@@ -412,6 +435,11 @@ pipeline {
412435
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}'
413436
}
414437
}
438+
stage('L1: Run all HE TN/ITN tests (restore grammars from cache)') {
439+
steps {
440+
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/he/ -m "not pleasefixme" --cpu --tn_cache_dir ${HE_TN_CACHE}'
441+
}
442+
}
415443
}
416444
}
417445

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
חצי
2+
רבע
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
°F פרנהייט
2+
°C צלסיוס
3+
° מעלות
4+
°F מעלות פרנהייט
5+
°C מעלות צלסיוס
6+
K קלווין
7+
% אחוז
8+
% אחוזים
9+
Hz הרץ
10+
kW קילוואט
11+
kW קילו ואט
12+
kW קילו וואט
13+
kWh קילו ואט לשעה
14+
kWh קילוואט לשעה
15+
Wh ואט לשעה
16+
W ואט
17+
ghz ג׳יגה הרץ
18+
ghz גיגה הרץ
19+
khz קילו הרץ
20+
mhz מגה הרץ
21+
v וולט
22+
nm ננומטר
23+
mA מילי אמפר
24+
tW טרה ואט
25+
mv מילי וולט
26+
mW מגה ואט
27+
μm מיקרומטר
28+
" אינץ׳
29+
cc סי סי
30+
ω אוהם
31+
db דציבל
32+
db דציבלים
33+
kb קילו ביט
34+
mb מגה ביט
35+
gb ג׳יגה ביט
36+
gb גיגה ביט
37+
tb טרה ביט
38+
pb פטה ביט
39+
mb מגה בייט
40+
kb קילו בייט
41+
gb ג׳יגה בייט
42+
gb גיגה בייט
43+
tb טרה בייט
44+
pb פטה בייט
45+
A אמפר
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
ינואר
2+
פברואר
3+
מרץ
4+
מרס
5+
אפריל
6+
מאי
7+
יוני
8+
יולי
9+
אוגוסט
10+
ספטמבר
11+
אוקטובר
12+
נובמבר
13+
דצמבר
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
ינואר 1
2+
פברואר 2
3+
מרץ 3
4+
אפריל 4
5+
מאי 5
6+
יוני 6
7+
יולי 7
8+
אוגוסט 8
9+
ספטמבר 9
10+
אוקטובר 10
11+
נובמבר 11
12+
דצמבר 12
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
ראשון 1
2+
שני 2
3+
שלישי 3
4+
רביעי 4
5+
חמישי 5
6+
שישי 6
7+
שביעי 7
8+
שמיני 8
9+
תשיעי 9
10+
עשירי 10
11+
אחת עשרה 11
12+
שתיים עשרה 12

0 commit comments

Comments
 (0)