We present a successfully implemented document repository REST service for flexible SCRUD (search, crate, read, update, delete) storage of social media conversations, using a GATE/TIPSTER-like document object model and providing a query language for document features. This software is currently being used in the SENSEI research project and will be published as open-source software before the project ends. It is, to the best of our knowledge, the first freely available, general purpose data repository to support large-scale multimodal (i.e., speech or text) conversation analytics.
@InProceedings{FUNK16.377,
author = {Adam Funk and R. Gaizauskas and Benoit Favre}, title = {A Document Repository for Social Media and Speech Conversations}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }