We address the task of automatically estimating the missing values of linguistic features by making use of the fact that some linguistic features in typological databases are informative to each other. The questions to address in this work are (i) how much predictive power do features have on the value of another feature? (ii) to what extent can we attribute this predictive power to genealogical or areal factors, as opposed to being provided by tendencies or implicational universals? To address these questions, we conduct a discriminative or predictive analysis on the typological database. Specifically, we use a machine-learning classifier to estimate the value of each feature of each language using the values of the other features, under different choices of training data: all the other languages, or all the other languages except for the ones having the same origin or area with the target language.
@InProceedings{TAKAMURA16.114,
author = {Hiroya Takamura and Ryo Nagata and Yoshifumi Kawasaki}, title = {Discriminative Analysis of Linguistic Features for Typological Study}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }