Title |
Improving Statistical Machine Translation Efficiency by Triangulation |
Authors |
Yu Chen, Andreas Eisele and Martin Kay |
Abstract |
In current phrase-based Statistical Machine Translation systems, more training data is generally better than less. However, a larger data set eventually introduces a larger model that enlarges the search space for the decoder, and consequently requires more time and more resources to translate. This paper describes an attempt to reduce the model size by filtering out the less probable entries based on testing correlation using additional training data in an intermediate third language. The central idea behind the approach is triangulation, the process of incorporating multilingual knowledge in a single system, which eventually utilizes parallel corpora available in more than two languages. We conducted experiments using Europarl corpus to evaluate our approach. The reduction of the model size can be up to 70% while the translation quality is being preserved. |
Language |
Multiple languages |
Topics |
Machine Translation, SpeechToSpeech Translation, Multilinguality, Statistical methods |
Full paper |
Improving Statistical Machine Translation Efficiency by Triangulation |
Slides |
Improving Statistical Machine Translation Efficiency by Triangulation |
Bibtex |
@InProceedings{CHEN08.733,
author = {Yu Chen, Andreas Eisele and Martin Kay},
title = {Improving Statistical Machine Translation Efficiency by Triangulation},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |