This paper presents the BUCC 2018 shared task on parallel sentence extraction from comparable corpora. This task used the same data as the BUCC 2017 shared task. 17 runs were submitted by 3 teams, covering all four proposed language pairs: German-English (3 runs), French-English (6 runs), Russian-English (3 runs), and Chinese-English (5 runs). The best F-scores as measured against the gold standard were 0.86 (German-English), 0.81 (French-English and Russian-English), and 0.77 (Chinese-English). All top scores improved over those of 2017.
@InProceedings{ZWEIGENBAUM18.12, author = {Pierre Zweigenbaum ,Serge Sharoff and Reinhard Rapp}, title = {Overview of the Third BUCC Shared Task: Spotting Parallel Sentences in Comparable Corpora}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Reinhard Rapp and Pierre Zweigenbaum and Serge Sharoff}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-07-8}, language = {english} }