Title |
Automatic Phoneme Segmentation with Relaxed Textual Constraints |
Authors |
Pierre Lanchantin, Andrew C. Morris, Xavier Rodet and Christophe Veaux |
Abstract |
Speech synthesis by unit selection requires the segmentation of a large single speaker high quality recording. Automatic speech recognition techniques, e.g. Hidden Markov Models (HMM), can be optimised for maximum segmentation accuracy. This paper presents the results of tuning such a phoneme segmentation system. Firstly, using no text transcription, the design of an HMM phoneme recogniser is optimised subject to a phoneme bigram language model. Optimal performance is obtained with triphone models, 7 states per phoneme and 5 Gaussians per state, reaching 94.4% phoneme recognition accuracy with 95.2% of phoneme boundaries within 70 ms of hand labelled boundaries. Secondly, using the textual information modeled by a multi-pronunciation phonetic graph built according to errors found in the first step, the reported phoneme recognition accuracy increases to 96.8% with 96.1% of phoneme boundaries within 70 ms of hand labelled boundaries. Finally, the results from these two segmentation methods based on different phonetic graphs, the evaluation set, the hand labelling and the test procedures are discussed and possible improvements are proposed. |
Language |
Single language |
Topics |
Corpus (creation, annotation, etc.), Speech recognition and understanding, Speech synthesis, Text-to-speech systems |
Full paper |
Automatic Phoneme Segmentation with Relaxed Textual Constraints |
Slides |
- |
Bibtex |
@InProceedings{LANCHANTIN08.606,
author = {Pierre Lanchantin, Andrew C. Morris, Xavier Rodet and Christophe Veaux},
title = {Automatic Phoneme Segmentation with Relaxed Textual Constraints},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
year = {2008},
month = {may},
date = {28-30},
address = {Marrakech, Morocco},
editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-4-0},
note = {http://www.lrec-conf.org/proceedings/lrec2008/},
language = {english}
} |