Title |
Bootstrapping Language Description: the case of Mpiemo (Bantu A, Central African Republic) |
Authors |
Harald Hammarström, Christina Thornell, Malin Petzell and Torbjörn Westerlund |
Abstract |
Linguists have long been producing grammatical decriptions of yet undescribed languages. This is a time-consuming process, which has already adapted to improved technology for recording and storage. We present here a novel application of NLP techniques to bootstrap analysis of collected data and speed-up manual selection work. To be more precise, we argue that unsupervised induction of morphology and part-of-speech analysis from raw text data is mature enough to produce useful results. Experiments with Latent Semantic Analysis were less fruitful. We exemplify this on Mpiemo, a so-far essentially undescribed Bantu language of the Central African Republic, for which raw text data was available. |
Language |
Single language |
Topics |
Acquisition, Machine Learning, Endangered languages, Language modelling |
Full paper |
Bootstrapping Language Description: the case of Mpiemo (Bantu A, Central African Republic) |
Slides |
- |
Bibtex |
@InProceedings{HAMMARSTRM08.848,
author = {Harald Hammarström, Christina Thornell, Malin Petzell and Torbjörn Westerlund},
title = {Bootstrapping Language Description: the case of Mpiemo (Bantu A, Central African Republic)},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |