Title |
Construction and Analysis of Word-level Time-aligned Simultaneous Interpretation Corpus |
Authors |
Takahiro Ono, Hitomi Tohyama and Shigeki Matsubara |
Abstract |
In this paper, quantitative analyses of the delay in Japanese-to-English (J-E) and English-to-Japanese (E-J) interpretations are described. The Simultaneous Interpretation Database of Nagoya University (SIDB) was used for the analyses. Beginning time and end time of each word were provided to the corpus using HMM-based phoneme segmentation, and the time lag between the corresponding words was calculated as the word-level delay. Word-level delay was calculated for 3,722 pairs and 4,932 pairs of words for J-E and E-J interpretations, respectively. The analyses revealed that J-E interpretation has much larger delay than E-J interpretation and that the difference of word order between Japanese and English affect the degree of delay. |
Language |
Multiple languages |
Topics |
Corpus (creation, annotation, etc.), Multimedia annotation and processing, Speech resource/database |
Full paper |
Construction and Analysis of Word-level Time-aligned Simultaneous Interpretation Corpus |
Slides |
- |
Bibtex |
@InProceedings{ONO08.860,
author = {Takahiro Ono, Hitomi Tohyama and Shigeki Matsubara},
title = {Construction and Analysis of Word-level Time-aligned Simultaneous Interpretation Corpus},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |