Title |
Large aligned treebanks for syntax-based machine translation |
Authors |
Gideon Kotzé, Vincent Vandeghinste, Scott Martens and Jörg Tiedemann |
Abstract |
We present a collection of parallel treebanks that have been automatically aligned on both the terminal and the nonterminal constituent level for use in syntax-based machine translation. We describe how they were constructed and applied to a syntax- and example-based machine translation system called Parse and Corpus-Based Machine Translation (PaCo-MT). For the language pair Dutch to English, we present evaluation scores of both the nonterminal constituent alignments and the MT system itself, and in the latter case, compare them with those of Moses, a current state-of-the-art statistical MT system, when trained on the same data. |
Topics |
Machine Translation, SpeechToSpeech Translation, Corpus (creation, annotation, etc.), Grammar and Syntax |
Full paper |
Large aligned treebanks for syntax-based machine translation |
Bibtex |
@InProceedings{KOTZ12.924,
author = {Gideon Kotzé and Vincent Vandeghinste and Scott Martens and Jörg Tiedemann}, title = {Large aligned treebanks for syntax-based machine translation}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, language = {english} } |