The infrastructure Global Open Resources and Information for Language and Linguistic Analysis (GORILLA) was created as a resource that provides a bridge between disciplines such as documentary, theoretical, and corpus linguistics, speech and language technologies, and digital language archiving services. GORILLA is designed as an interface between digital language archive services and language data producers. It addresses various problems of common digital language archive infrastructures. At the same time it serves the speech and language technology communities by providing a platform to create and share speech and language data from low-resourced and endangered languages. It hosts an initial collection of language models for speech and natural language processing (NLP), and technologies or software tools for corpus creation and annotation. GORILLA is designed to address the Transcription Bottleneck in language documentation, and, at the same time to provide solutions to the general Language Resource Bottleneck in speech and language technologies. It does so by facilitating the cooperation between documentary and theoretical linguistics, and speech and language technologies research and development, in particular for low-resourced and endangered languages.
@InProceedings{CAVAR16.908,
author = {Damir Cavar and Malgorzata Cavar and Lwin Moe}, title = {Global Open Resources and Information for Language and Linguistic Analysis (GORILLA)}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }