~This paper describes the development of free/open-source finite-state morphological transducers for Tuvan, a Turkic language spoken in and around the Tuvan Republic in Russia. The finite-state toolkit used for the work is the Helsinki Finite-State Toolkit (HFST), we use the lexc formalism for modelling the morphotactics and twol formalism for modelling morphophonological alternations. We present a novel description of the morphological combinatorics of pseudo-derivational morphemes in Tuvan. An evaluation is presented which shows that the transducer has a reasonable coverage―around 93%―on freely-available corpora of the languages, and high precision―over 99%―on a manually verified test set.
@InProceedings{TYERS16.1009,
author = {Francis Tyers and Aziyana Bayyr-ool and Aelita Salchak and Jonathan Washington}, title = {A Finite-state Morphological Analyser for Tuvan}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }