Title |
Creation of Learner Corpus and Its Application to Speech Recognition |
Authors |
Hiroki Yamazaki, Keisuke Kitamura, Takashi Harada and Seiichi Yamamoto |
Abstract |
Some big languages like English are spoken by a lot of people whose mother tongues are different from. Their second languages often have not only distinct accent but also different lexical and syntactic characteristics. Speech recognition performance is severely affected when the lexical, syntactic, or semantic characteristics in the training and recognition tasks differ. Language model of a speech recognition system is usually trained with transcribed speech data or text data collected in English native countries, therefore, speech recognition performance is expected to be degraded by mismatch of lexical and syntactic characteristics between native speakers and second language speakers as well as the distinction between their accents. The aim of language model adaptation is to exploit specific, albeit limited, knowledge about the recognition task to compensate for mismatch of the lexical, syntactic, or semantic characteristics. This paper describes whether the language model adaptation is effective for compensating for the mismatch between the lexical, syntactic, or semantic characteristics of native speakers and second language speakers. |
Language |
Single language |
Topics |
Corpus (creation, annotation, etc.), Speech recognition and understanding, Language modelling |
Full paper |
Creation of Learner Corpus and Its Application to Speech Recognition |
Slides |
- |
Bibtex |
@InProceedings{YAMAZAKI08.39,
author = {Hiroki Yamazaki, Keisuke Kitamura, Takashi Harada and Seiichi Yamamoto},
title = {Creation of Learner Corpus and Its Application to Speech Recognition},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |