This paper presents an automatic corpus-based process to author an open-domain conversational strategy usable both in chatterbot systems and as a fallback strategy for out-of-domain human utterances. Our approach is implemented on a corpus of television drama subtitles. This system is used as a chatterbot system to collect a corpus of 41 open-domain textual dialogues with 27 human participants. The general capabilities of the system are studied through objective measures and subjective self-reports in terms of understandability, repetition and coherence of the system responses selected in reaction to human utterances. Subjective evaluations of the collected dialogues are presented with respect to amusement, engagement and enjoyability. The main factors influencing those dimensions in our chatterbot experiment are discussed.
@InProceedings{DUBUISSONDUPLESSIS16.396,
author = {Guillaume Dubuisson Duplessis and Vincent Letard and Anne-Laure Ligozat and Sophie Rosset}, title = {Purely Corpus-based Automatic Conversation Authoring}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }