Title |
Pragmatic Annotation of Discourse Markers in a Multilingual Parallel Corpus (Arabic- Spanish-English) |
Authors |
Doaa Samy and Ana González-Ledesma |
Abstract |
Discourse structure and coherence relations are one of the main inferential challenges addressed by computational pragmatics. The present study focuses on discourse markers as key elements in guiding the inferences of the statements in natural language. Through a rule-based approach for the automatic identification, classification and annotation of the discourse markers in a multilingual parallel corpus (Arabic-Spanish-English), this research provides a valuable resource for the community. Two main aspects define the novelty of the present study. First, it offers a multilingual computational processing of discourse markers, grounded on a theoritical framework and implemented in a XML tagging scheme. The XML scheme represents a set of pragmatic and grammatical attributes, considered as basic features for the different kinds of discourse markers. Besides, the scheme provides a typology of discourse markers based on their discursive functions including hypothesis, co-argumentation, cause, consequence, concession, generalization, topicalization, reformulation, enumeration, synthesis, etc. Second, Arabic language is addressed from a computational pragmatic perspective where the identification, classification and annotation processes are carried out using the information provided from the tagging of Spanish discourse markers and the alignments. |
Language |
Multiple languages |
Topics |
Corpus (creation, annotation, etc.), Multilinguality, Tagging |
Full paper |
Pragmatic Annotation of Discourse Markers in a Multilingual Parallel Corpus (Arabic- Spanish-English) |
Slides |
- |
Bibtex |
@InProceedings{SAMY08.828,
author = {Doaa Samy and Ana González-Ledesma},
title = {Pragmatic Annotation of Discourse Markers in a Multilingual Parallel Corpus (Arabic- Spanish-English)},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |