Word Sense Disambiguation (WSD) is one of the open problems in the area of natural language processing. Various supervised, unsupervised and knowledge based approaches have been proposed for automatically determining the sense of a word in a particular context. It has been observed that such approaches often find it difficult to beat the WordNet First Sense (WFS) baseline which assigns the sense irrespective of context. In this paper, we present our work on creating the WFS baseline for Hindi language by manually ranking the synsets of Hindi WordNet. A ranking tool is developed where human experts can see the frequency of the word senses in the sense-tagged corpora and have been asked to rank the senses of a word by using this information and also his/her intuition. The accuracy of WFS baseline is tested on several standard datasets. F-score is found to be 60%, 65% and 55% on Health, Tourism and News datasets respectively. The created rankings can also be used in other NLP applications viz., Machine Translation, Information Retrieval, Text Summarization, etc.
@InProceedings{BHINGARDIVE16.1214,
author = {Sudha Bhingardive and Rajita Shukla and Jaya Saraswati and Laxmi Kashyap and Dhirendra Singh and Pushpak Bhattacharya}, title = {Synset Ranking of Hindi WordNet}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }