@inproceedings{3d0cb3bbc4044f6f9def4dbf0abb91a0,
title = "Topic cache language model for speech recognition",
abstract = "Traditional n-gram language models suffer from insufficient long-distance information. The cache language model, which captures the dynamics of word occurrences in a cache, is feasible to compensate this weakness. This paper presents a new topic cache model for speech recognition based on the latent Dirichlet language model where the latent topic structure is explored from n-gram events and employed for word prediction. In particular, the long-distance topic information is continuously updated from the large-span historical words and dynamically incorporated in generating the topic mixtures through Bayesian learning. The topic cache language model does effectively characterize the unseen n-gram events and catch the topic cache for long-distance language modeling. In the experiments on Wall Street Journal corpus, the proposed method achieves better performance than baseline n-gram and the other related language models in terms of perplexity and recognition accuracy.",
keywords = "Bayes procedure, Clustering method, Natural language, Smoothing method, Speech recognition",
author = "Chueh, {Chuang Hua} and Chien, {Jen Tzung}",
year = "2010",
month = mar,
doi = "10.1109/ICASSP.2010.5495011",
language = "英语",
isbn = "9781424442966",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5194--5197",
booktitle = "2010 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2010 - Proceedings",
address = "美国",
note = "2010 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2010 ; Conference date: 14-03-2010 Through 19-03-2010",
}