In this article we propose a descriptive study of a chat conversations corpus from an assistance contact center. Conversations are described from several view points, including interaction analysis, language deviation analysis and typographic expressivity marks analysis. We provide in particular a detailed analysis of language deviations that are encountered in our corpus of 230 conversations, corresponding to 6879 messages and 76839 words. These deviations may be challenging for further syntactic and semantic parsing. Analysis is performed with a distinction between Customer messages and Agent messages. On the overall only 4% of the observed words are misspelled but 26% of the messages contain at least one erroneous word (rising to 40% when focused on Customer messages). Transcriptions of telephone conversations from an assistance call center are also studied, allowing comparisons between these two interaction modes to be drawn. The study reveals significant differences in terms of conversation flow, with an increased efficiency for chat conversations in spite of longer temporal span.
@InProceedings{DAMNATI16.487,
author = {Geraldine Damnati and Aleksandra Guerraz and Delphine Charlet}, title = {Web Chat Conversations from Contact Centers: a Descriptive Study}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }