From eeea41d92d50ef7ad111323176ae64377304a934 Mon Sep 17 00:00:00 2001 From: Amit Moryossef Date: Tue, 28 Apr 2026 09:21:02 +0000 Subject: [PATCH 1/2] Add STK LSF motion capture dataset (Reverdy et al., 2024) Reference the SignLang 2024 paper introducing STK LSF, a ~1h bilingual French/LSF MoCap corpus for the SignToKids project, in the continuous sign corpora section. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/index.md | 1 + src/references.bib | 105 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/src/index.md b/src/index.md index 62a7598..1411426 100644 --- a/src/index.md +++ b/src/index.md @@ -1173,6 +1173,7 @@ contain parallel sequences of signs and spoken language. Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively]. Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending]. These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. +@dataset:reverdy-etal-2024-stk introduce STK LSF, a bilingual French/French Sign Language (LSF) motion capture corpus of approximately one hour signed by a deaf signer covering targeted grammatical phenomena and three children's tales, recorded with a 240 Hz Optitrack system and used to drive a signing avatar for educational tools in the SignToKids project. ###### Availability {-} diff --git a/src/references.bib b/src/references.bib index 226750a..4371d2c 100644 --- a/src/references.bib +++ b/src/references.bib @@ -4711,3 +4711,108 @@ @inproceedings{schulder-etal-2024-signs url = {https://aclanthology.org/2024.signlang-1.38}, year = {2024} } +} + +} + +} + +} + + + title = "Signs and Synonymity: Continuing Development of the Multilingual Sign Language {W}ordnet", + author = {Schulder, Marc and + Bigeard, Sam and + Kopf, Maria and + Hanke, Thomas and + Kuder, Anna and + W{\'o}jcicka, Joanna and + Mesch, Johanna and + Bj{\"o}rkstrand, Thomas and + Vacalopoulou, Anna and + Vasilaki, Kyriaki and + Goulas, Theodore and + Fotinea, Stavroula-Evita and + Efthimiou, Eleni}, +} + +@inproceedings{vazquez-enriquez-etal-2024-signamed, + title = "{S}igna{M}ed: a Cooperative Bilingual {LSE}-{S}panish Dictionary in the Healthcare Domain", + author = "V{\'a}zquez-Enr{\'i}quez, Manuel and + Alba-Castro, Jos{\'e} Luis and + P{\'e}rez-P{\'e}rez, Ania and + Cabeza-Pereiro, Carmen and + Doc{\'i}o-Fern{\'a}ndez, Laura", +} + +@inproceedings{picron-etal-2024-easier, + title = "The {EASIER} Mobile Application and Avatar End-User Evaluation Methodology", + author = "Picron, Frankie and + Van Landuyt, Davy and + Omardeen, Rehana and + Efthimiou, Eleni and + Wolfe, Rosalee and + Fotinea, Stavroula-Evita and + Goulas, Theodore and + Tismer, Christian and + Kopf, Maria and + Hanke, Thomas", +} + +@inproceedings{de-quadros-etal-2024-signbank, + title = "{S}ignbank 2.0 of Sign Languages: Easy to Administer, Easy to Use, Easy to Share", + author = "de Quadros, Ronice Muller and + Rathmann, Christian and + Romanek, Peter Zal{\'a}n and + Fernandes, Francisco and + Cond{\'e}, Sther", +} + +@inproceedings{ranum-etal-2024-3d, + title = "3{D}-{LEX} v1.0 {--} 3{D} Lexicons for {A}merican {S}ign {L}anguage and {S}ign {L}anguage of the {N}etherlands", + author = "Ranum, Oline and + Otterspeer, Gom{\`e}r and + Andersen, Jari I. and + Belleman, Robert G. and + Roelofsen, Floris", +} + +@inproceedings{dataset:reverdy-etal-2024-stk, + title = "{STK} {LSF}: A Motion Capture Dataset in {LSF} for {S}ign{T}o{K}ids", + author = "Reverdy, Cl{\'e}ment and + Gibet, Sylvie and + Le Naour, Thibaut", + editor = "Efthimiou, Eleni and + Fotinea, Stavroula-Evita and + Hanke, Thomas and + Hochgesang, Julie A. and + Mesch, Johanna and + Schulder, Marc", + booktitle = "Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources", + month = may, + year = "2024", + address = "Torino, Italia", + publisher = "ELRA and ICCL", + url = "https://aclanthology.org/2024.signlang-1.38/", + pages = "343--353" +} + + url = "https://aclanthology.org/2024.signlang-1.43/", + pages = "386--394" +} + + url = "https://aclanthology.org/2024.signlang-1.31/", + pages = "276--281" +} + + url = "https://aclanthology.org/2024.signlang-1.34/", + pages = "302--314" +} + + url = "https://aclanthology.org/2024.signlang-1.33/", + pages = "290--301" +} + + url = "https://aclanthology.org/2024.signlang-1.35/", + pages = "315--322" +} From ff882da3e9f883e61390a27b247175448f9037a6 Mon Sep 17 00:00:00 2001 From: AmitMY Date: Tue, 28 Apr 2026 09:57:06 +0000 Subject: [PATCH 2/2] Move and trim reverdy-etal STK LSF entry (review pattern) --- src/index.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index.md b/src/index.md index 1411426..f0f8d3a 100644 --- a/src/index.md +++ b/src/index.md @@ -1154,6 +1154,8 @@ The Public DGS Corpus also saw multiple SignLang 2024 contributions: @konrad-eta @ranum-etal-2024-3d introduce 3D-LEX v1.0, a 3D-motion-capture lexicon of 1,000 isolated signs each in ASL and NGT (combining Vicon body pose, StretchSense glove handshapes, and Live Link Face features) and use it to derive semi-automatic handshape annotations that match expert labels on a WLASL ISLR benchmark. +@dataset:reverdy-etal-2024-stk introduce STK LSF, a one-hour bilingual French / French Sign Language (LSF) motion-capture corpus signed by a deaf signer covering targeted grammatical phenomena and three children's tales, used to drive a signing avatar for the SignToKids project. + @@ -1173,7 +1175,6 @@ contain parallel sequences of signs and spoken language. Available continuous sign corpora are extremely limited, containing 4-6 orders of magnitude fewer sentence pairs than similar corpora for spoken language machine translation [@arivazhagan2019massively]. Moreover, while automatic speech recognition (ASR) datasets contain up to 50,000 hours of recordings [@pratap2020mls], the most extensive continuous sign language corpus contains only 1,150 hours, and only 50 of them are publicly available [@dataset:hanke-etal-2020-extending]. These datasets are usually synthesized [@dataset:databases2007volumes;@dataset:Crasborn2008TheCN;@dataset:ko2019neural;@dataset:hanke-etal-2020-extending] or recorded in studio conditions [@dataset:forster2014extensions;@cihan2018neural], which does not account for noise in real-life conditions. Moreover, some contain signed interpretations of spoken language rather than naturally-produced signs, which may not accurately represent native signing since translation is now a part of the discourse event. -@dataset:reverdy-etal-2024-stk introduce STK LSF, a bilingual French/French Sign Language (LSF) motion capture corpus of approximately one hour signed by a deaf signer covering targeted grammatical phenomena and three children's tales, recorded with a 240 Hz Optitrack system and used to drive a signing avatar for educational tools in the SignToKids project. ###### Availability {-}