In this paper, we present a study on tweet classification which aims to define the communication behavior of the 103 French museums that participated in 2014 in the Twitter operation: MuseumWeek. The tweets were automatically classified in four communication categories: sharing experience, promoting participation, interacting with the community, and promoting-informing about the institution. Our classification is multi-class. It combines Support Vector Machines and Naive Bayes methods and is supported by a selection of eighteen subtypes of features of four different kinds: metadata information, punctuation marks, tweet-specific and lexical features. It was tested against a corpus of 1,095 tweets manually annotated by two experts in Natural Language Processing and Information Communication and twelve Community Managers of French museums. We obtained an state-of-the-art result of F1-score of 72% by 10-fold cross-validation. This result is very encouraging since is even better than some state-of-the-art results found in the tweet classification literature.
@InProceedings{FOUCAULT16.985,
author = {Nicolas Foucault and Antoine Courtin}, title = {Automatic Classification of Tweets for Analyzing Communication Behavior of Museums}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }