This paper presents some work on direct and indirect speech in Portuguese using corpus-based methods: we report on a study whose aim was to identify (i) Portuguese verbs used to introduce reported speech and (ii) syntactic patterns used to convey reported speech, in order to enhance the performance of a quotation extraction system, dubbed QUEMDISSE?. In addition, (iii) we present a Portuguese corpus annotated with reported speech, using the lexicon and rules provided by (i) and (ii), and discuss the process of their annotation and what was learned.
@InProceedings{FREITAS16.417,
author = {Cláudia Freitas and Bianca Freitas and Diana Santos}, title = {QUEMDISSE? Reported speech in Portuguese}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }