Title |
Learning the Species of Biomedical Named Entities from Annotated Corpora |
Authors |
Xinglong Wang and Claire Grover |
Abstract |
In biomedical articles, terms with the same surface forms are often used to refer to different entities across a number of model organisms, in which case determining the species becomes crucial to term identification systems that ground terms to specific database identifiers. This paper describes a rule-based system that extracts species indicating words, such as human or murine, which can be used to decide the species of the nearby entity terms, and a machine-learning species disambiguation system that was developed on manually species-annotated corpora. Performance of both systems were evaluated on gold-standard datasets, where the machine-learning system yielded better overall results. |
Language |
Single language |
Topics |
Information Extraction, Information Retrieval, Corpus (creation, annotation, etc.), Text mining |
Full paper |
Learning the Species of Biomedical Named Entities from Annotated Corpora |
Slides |
Learning the Species of Biomedical Named Entities from Annotated Corpora |
Bibtex |
@InProceedings{WANG08.460,
author = {Xinglong Wang and Claire Grover},
title = {Learning the Species of Biomedical Named Entities from Annotated Corpora},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |