This paper presents NileULex, which is an Arabic sentiment lexicon containing close to six thousands Arabic words and compound phrases. Forty five percent of the terms and expressions in the lexicon are Egyptian or colloquial while fifty five percent are Modern Standard Arabic. While the collection of many of the terms included in the lexicon was done automatically, the actual addition of any term was done manually. One of the important criterions for adding terms to the lexicon, was that they be as unambiguous as possible. The result is a lexicon with a much higher quality than any translated variant or automatically constructed one. To demonstrate that a lexicon such as this can directly impact the task of sentiment analysis, a very basic machine learning based sentiment analyser that uses unigrams, bigrams, and lexicon based features was applied on two different Twitter datasets. The obtained results were compared to a baseline system that only uses unigrams and bigrams. The same lexicon based features were also generated using a publicly available translation of a popular sentiment lexicon. The experiments show that usage of the developed lexicon improves the results over both the baseline and the publicly available lexicon.
@InProceedings{ELBELTAGY16.1100,
author = {Samhaa R. El-Beltagy}, title = {NileULex: A Phrase and Word Level Sentiment Lexicon for Egyptian and Modern Standard Arabic}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }