Title |
An Effortless Way To Create Large-Scale Datasets For Famous Speakers |
Authors |
François Salmon and Félicien Vallet |
Abstract |
The creation of large-scale multimedia datasets has become a scientific matter in itself. Indeed, the fully-manual annotation of hundreds or thousands of hours of video and/or audio turns out to be practically infeasible. In this paper, we propose an extremly handy approach to automatically construct a database of famous speakers from TV broadcast news material. We then run a user experiment with a correctly designed tool that demonstrates that very reliable results can be obtained with this method. In particular, a thorough error analysis demonstrates the value of the approach and provides hints for the improvement of the quality of the dataset. |
Topics |
Speech Resource/Database, Person Identification |
Full paper |
An Effortless Way To Create Large-Scale Datasets For Famous Speakers |
Bibtex |
@InProceedings{SALMON14.32,
author = {François Salmon and Félicien Vallet}, title = {An Effortless Way To Create Large-Scale Datasets For Famous Speakers}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, language = {english} } |