Title |
Matching Cultural Heritage items to Wikipedia |
Authors |
Eneko Agirre, Ander Barrena, Oier Lopez de Lacalle, Aitor Soroa, Samuel Fernando and Mark Stevenson |
Abstract |
Digitised Cultural Heritage (CH) items usually have short descriptions and lack rich contextual information. Wikipedia articles, on the contrary, include in-depth descriptions and links to related articles, which motivate the enrichment of CH items with information from Wikipedia. In this paper we explore the feasibility of finding matching articles in Wikipedia for a given Cultural Heritage item. We manually annotated a random sample of items from Europeana, and performed a qualitative and quantitative study of the issues and problems that arise, showing that each kind of CH item is different and needs a nuanced definition of what ``matching article'' means. In addition, we test a well-known wikification (aka entity linking) algorithm on the task. Our results indicate that a substantial number of items can be effectively linked to their corresponding Wikipedia article. |
Topics |
Corpus (creation, annotation, etc.), Digital libraries, Semantics |
Full paper |
Matching Cultural Heritage items to Wikipedia |
Bibtex |
@InProceedings{AGIRRE12.1021,
author = {Eneko Agirre and Ander Barrena and Oier Lopez de Lacalle and Aitor Soroa and Samuel Fernando and Mark Stevenson}, title = {Matching Cultural Heritage items to Wikipedia}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, language = {english} } |