Title |
Some Fine Points of Hybrid Natural Language Parsing |
Authors |
Peter Adolphs, Stephan Oepen, Ulrich Callmeier, Berthold Crysmann, Dan Flickinger and Bernd Kiefer |
Abstract |
Large-scale grammar-based parsing systems nowadays increasingly rely on independently developed, more specialized components for pre-processing their input. However, different tools make conflicting assumptions about very basic properties such as tokenization. To make linguistic annotation gathered in pre-processing available to deep parsing, a hybrid NLP system needs to establish a coherent mapping between the two universes. Our basic assumption is that tokens are best described by attribute value matrices (AVMs) that may be arbitrarily complex. We propose a powerful resource-sensitive rewrite formalism, chart mapping, that allows us to mediate between the token descriptions delivered by shallow pre-processing components and the input expected by the grammar. We furthermore propose a novel way of unknown word treatment where all generic lexical entries are instantiated that are licensed by a particular token AVM. Again, chart mapping is used to give the grammar writer full control as to which items (e.g. native vs. generic lexical items) enter syntactic parsing. We discuss several further uses of the original idea and report on early experiences with the new machinery. |
Language |
Language-independent |
Topics |
Parsing Systems, Tools, systems, applications |
Full paper |
Some Fine Points of Hybrid Natural Language Parsing |
Slides |
Some Fine Points of Hybrid Natural Language Parsing |
Bibtex |
@InProceedings{ADOLPHS08.349,
author = {Peter Adolphs, Stephan Oepen, Ulrich Callmeier, Berthold Crysmann, Dan Flickinger and Bernd Kiefer},
title = {Some Fine Points of Hybrid Natural Language Parsing},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |