This paper proposes a new topic model that exploits word sense information in order to discover less redundant and more informative topics. Word sense information is obtained from WordNet and the discovered topics are groups of synsets, instead of mere surface words. A key feature is that all the known senses of a word are considered, with their probabilities. Alternative configurations of the model are described and compared to each other and to LDA, the most popular topic model. However, the obtained results suggest that there are no benefits of enriching LDA with word sense information.
@InProceedings{FERRUGENTO16.630,
author = {Adriana Ferrugento and Hugo Gonçalo Oliveira and Ana Alves and Filipe Rodrigues}, title = {Can Topic Modelling benefit from Word Sense Information?}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }