@inproceedings{d201dd3e13df44d6ac65de66c8a49030,
title = "Using speech recognition technique for constructing a phonetically transcribed Taiwanese (Min-nan) text corpus",
abstract = "Collection of Taiwanese text corpus with phonetic transcription suffers from the problems of multiple pronunciation variation. By augmenting the text with speech, and using automatic speech recognition with a sausage searching net constructed from the multiple pronunciations of the text corresponding to its speech utterance, we are able to reduce the effort for phonetic transcription. By using the multiple pronunciation lexicon, the error rate of transcription 13.94% was achieved. Further improvement can be achieved by adapting the pronunciation lexicon with pronunciation variation (PV) rules derived from a manual corrected speech corpus. The PV rules can be categorized into two kinds: the knowledge-based and data-driven rules. By incorporating the PV rules, the error rate reduction 13.63% could be achieved. Although the technique was developed for Taiwanese speech, it could also be adapted easily to be applied in the other similar {"}minority{"} Chinese spoken languages.",
keywords = "Multiple pronunciations, Phonetic transcription, Pronunciation variation, Taiwanese, Text corpus",
author = "Liang, {Min Siong} and Lyu, {Ren Yuan} and Chiang, {Yuang Chin}",
year = "2006",
language = "英语",
isbn = "9781604234497",
series = "Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH",
publisher = "International Speech Communication Association",
pages = "193--196",
booktitle = "INTERSPEECH 2006 and 9th International Conference on Spoken Language Processing, INTERSPEECH 2006 - ICSLP",
note = "INTERSPEECH 2006 and 9th International Conference on Spoken Language Processing, INTERSPEECH 2006 - ICSLP ; Conference date: 17-09-2006 Through 21-09-2006",
}