In this work, we present the Language Computer Corporation (LCC) annotated metaphor datasets, which represent the largest and most comprehensive resource for metaphor research to date. These datasets were produced over the course of three years by a staff of nine annotators working in four languages (English, Spanish, Russian, and Farsi). As part of these datasets, we provide (1) metaphoricity ratings for within-sentence word pairs on a four-point scale, (2) scored links to our repository of 114 source concept domains and 32 target concept domains, and (3) ratings for the affective polarity and intensity of each pair. Altogether, we provide 188,741 annotations in English (for 80,100 pairs), 159,915 annotations in Spanish (for 63,188 pairs), 99,740 annotations in Russian (for 44,632 pairs), and 137,186 annotations in Farsi (for 57,239 pairs). In addition, we are providing a large set of likely metaphors which have been independently extracted by our two state-of-the-art metaphor detection systems but which have not been analyzed by our team of annotators.
@InProceedings{MOHLER16.1156,
author = {Michael Mohler and Mary Brunson and Bryan Rink and Marc Tomlinson}, title = {Introducing the LCC Metaphor Datasets}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }