We propose an unsupervised system for a variant of cross-lingual lexical substitution (CLLS) to be used in a reading scenario in computer-assisted language learning (CALL), in which single-word translations provided by a dictionary are ranked according to their appropriateness in context. In contrast to most alternative systems, ours does not rely on either parallel corpora or machine translation systems, making it suitable for low-resource languages as the language to be learned. This is achieved by a graph-based scoring mechanism which can deal with ambiguous translations of context words provided by a dictionary. Due to this decoupling from the source language, we need monolingual corpus resources only for the target language, i.e. the language of the translation candidates. We evaluate our approach for the language pair Norwegian Nynorsk-English on an exploratory manually annotated gold standard and report promising results. When running our system on the original SemEval CLLS task, we rank 6th out of 18 (including 2 baselines and our 2 system variants) in the best evaluation.
@InProceedings{ECKER16.170,
author = {Stefan Ecker and Andrea Horbach and Stefan Thater}, title = {Unsupervised Ranked Cross-Lingual Lexical Substitution for Low-Resource Languages}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }