We present a dataset created from the Hansard House of Commons archived debates of the UK parliament (2013-2016). The resource includes fine-grained topic annotations at the document level and is enriched with additional semantic information such as the one provided by entity links. We assess the quality and usefulness of this corpus with two benchmarks on topic classification and ranking.
@InProceedings{NANNI18.6, author = {Federico Nanni ,Mahmoud Osman ,Yi-Ru Cheng ,Simone Paolo Ponzetto and Laura Dietz}, title = {UKParl: A Semantified and Topically Organized Corpus of Political Speeches}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Darja Fišer and Maria Eskevich and Franciska de Jong}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-02-3}, language = {english} }