The automatic analysis of texts containing opinions of users about, e.g., products or political views has gained attention within the last decades. However, previous work on the task of analyzing user reviews about mobile applications in app stores is limited. Publicly available corpora do not exist, such that a comparison of different methods and models is difficult. We fill this gap by contributing the Sentiment Corpus of App Reviews (SCARE), which contains fine-grained annotations of application aspects, subjective (evaluative) phrases and relations between both. This corpus consists of 1,760 annotated application reviews from the Google Play Store with 2,487 aspects and 3,959 subjective phrases. We describe the process and methodology how the corpus was created. The Fleiss Kappa between four annotators reveals an agreement of 0.72. We provide a strong baseline with a linear-chain conditional random field and word-embedding features with a performance of 0.62 for aspect detection and 0.63 for the extraction of subjective phrases. The corpus is available to the research community to support the development of sentiment analysis methods on mobile application reviews.
@InProceedings{SNGER16.59,
author = {Mario Sänger and Ulf Leser and Steffen Kemmerer and Peter Adolphs and Roman Klinger}, title = {SCARE ― The Sentiment Corpus of App Reviews with Fine-grained Annotations in German}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }