In this paper we present a novel application of compositional distributional semantic models (CDSMs): prediction of lexical typology. The paper introduces the notion of typological closeness, which is a novel rigorous formalization of semantic similarity based on comparison of multilingual data. Starting from the Moscow Database of Qualitative Features for adjective typology, we create four datasets of typological closeness, on which we test a range of distributional semantic models. We show that, on the one hand, vector representations of phrases based on data from one language can be used to predict how words within the phrase translate into different languages, and, on the other hand, that typological data can serve as a semantic benchmark for distributional models. We find that compositional distributional models, especially parametric ones, perform way above non-compositional alternatives on the task.
@InProceedings{RYZHOVA16.665,
author = {Daria Ryzhova and Maria Kyuseva and Denis Paperno}, title = {Typology of Adjectives Benchmark for Compositional Distributional Models}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }