Parallel corpora are crucial for machine translation (MT), however they are quite scarce for most language pairs and domains. As comparable corpora are far more available, many studies have been conducted to extract parallel sentences from them for MT. In this paper, we exploit the neural network features acquired from neural MT for parallel sentence extraction. We observe significant improvements for both accuracy in sentence extraction and MT performance.
@InProceedings{CHU16.363,
author = {Chenhui Chu and Raj Dabre and Sadao Kurohashi}, title = {Parallel Sentence Extraction from Comparable Corpora with Neural Network Features}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }