This paper presents a lexical resource developed for Portuguese. The resource contains sentences annotated with semantic roles. The sentences were extracted from two domains: Cardiology research papers and newspaper articles. Both corpora were analyzed with the PALAVRAS parser and subsequently processed with a subcategorization frames extractor, so that each sentence that contained at least one main verb was stored in a database together with its syntactic organization. The annotation was manually carried out by a linguist using an annotation interface. Both the annotated and non-annotated data were exported to an XML format, which is readily available for download. The reason behind exporting non-annotated data is that there is syntactic information collected from the parser annotation in the non-annotated data, and this could be useful for other researchers. The sentences from both corpora were annotated separately, so that it is possible to access sentences either from the Cardiology or from the newspaper corpus. The full resource presents more than seven thousand semantically annotated sentences, containing 192 different verbs and more than 15 thousand individual arguments and adjuncts.
@InProceedings{ZILIO16.946,
author = {Leonardo Zilio and Maria José Bocorny Finatto and Aline Villavicencio}, title = {VerbLexPor: a lexical resource with semantic roles for Portuguese}, booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, year = {2016}, month = {may}, date = {23-28}, location = {Portorož, Slovenia}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Sara Goggi and Marko Grobelnik and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {978-2-9517408-9-1}, language = {english} }