Title |
Ontology-Based Interface Specifications for a NLP Pipeline Architecture |
Authors |
Ekaterina Buyko, Christian Chiarcos and Antonio Pareja-Lora |
Abstract |
The high level of heterogeneity between linguistic annotations usually complicates the interoperability of processing modules within an NLP pipeline. In this paper, a framework for the interoperation of NLP components, based on a data-driven architecture, is presented. Here, ontologies of linguistic annotation are employed to provide a conceptual basis for the tagset-neutral processing of linguistic annotations. The framework proposed here is based on a set of structured OWL ontologies: a reference ontology, a set of annotation models which formalize different annotation schemes, and a declarative linking between these, specified separately. This modular architecture is particularly scalable and flexible as it allows for the integration of different reference ontologies of linguistic annotations in order to overcome the absence of a consensus for an ontology of linguistic terminology. Our proposal originates from three lines of research from different fields: research on annotation type systems in UIMA; the ontological architecture OLiA, originally developed for sustainable documentation and annotation-independent corpus browsing, and the ontologies of the OntoTag model, targeted towards the processing of linguistic annotations in Semantic Web applications. We describe how UIMA annotations can be backed up by ontological specifications of annotation schemes as in the OLiA model, and how these are linked to the OntoTag ontologies, which allow for further ontological processing. |
Language |
Language-independent |
Topics |
LR Infrastructures and Architectures, Ontologies, Corpus (creation, annotation, etc.) |
Full paper |
Ontology-Based Interface Specifications for a NLP Pipeline Architecture |
Slides |
- |
Bibtex |
@InProceedings{BUYKO08.215,
author = {Ekaterina Buyko, Christian Chiarcos and Antonio Pareja-Lora},
title = {Ontology-Based Interface Specifications for a NLP Pipeline Architecture},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |