Title |
Eksairesis: A Domain-Adaptable System for Ontology Building from Unstructured Text |
Authors |
Katia Lida Kermanidis, Aristomenis Thanopoulos, Manolis Maragoudakis and Nikos Fakotakis |
Abstract |
This paper describes Eksairesis, a system for learning economic domain knowledge automatically from Modern Greek text. The knowledge is in the form of economic terms and the semantic relations that govern them. The entire process in based on the use of minimal language-dependent tools, no external linguistic resources, and merely free, unstructured text. The methodology is thereby easily portable to other domains and other languages. The text is pre-processed with basic morphological annotation, and semantic (named and other) entities are identified using supervised learning techniques. Statistical filtering, i.e. corpora comparison is used to extract domain terms and supervised learning is again employed to detect the semantic relations between pairs of terms. Advanced classification schemata, ensemble learning, and one-sided sampling, are experimented with in order to deal with the noise in the data, which is unavoidable due to the low pre-processing level and the lack of sophisticated resources. An average 68.5% f-score over all the classes is achieved when learning semantic relations. Bearing in mind the use of minimal resources and the highly automated nature of the process, classification performance is very promising, compared to results reported in previous work. |
Language |
Single language |
Topics |
Acquisition, Machine Learning, Ontologies, Information Extraction, Information Retrieval |
Full paper |
Eksairesis: A Domain-Adaptable System for Ontology Building from Unstructured Text |
Slides |
- |
Bibtex |
@InProceedings{KERMANIDIS08.147,
author = {Katia Lida Kermanidis, Aristomenis Thanopoulos, Manolis Maragoudakis and Nikos Fakotakis},
title = {Eksairesis: A Domain-Adaptable System for Ontology Building from Unstructured Text},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |