Title |
Unsupervised Resource Creation for Textual Inference Applications |
Authors |
Jeremy Bensley and Andrew Hickl |
Abstract |
This paper explores how a battery of unsupervised techniques can be used in order to create large, high-quality corpora for textual inference applications, such as systems for recognizing textual entailment (TE) and textual contradiction (TC). We show that it is possible to automatically generate sets of positive and negative instances of textual entailment and contradiction from textual corpora with greater than 90% precision. We describe how we generated more than 1 million TE pairs - and a corresponding set of and 500,000 TC pairs - from the documents found in the 2 GB AQUAINT-2 newswire corpus. |
Language |
Single language |
Topics |
Semantics, Statistical methods, Corpus (creation, annotation, etc.) |
Full paper |
Unsupervised Resource Creation for Textual Inference Applications |
Slides |
- |
Bibtex |
@InProceedings{BENSLEY08.146,
author = {Jeremy Bensley and Andrew Hickl},
title = {Unsupervised Resource Creation for Textual Inference Applications},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |