@inproceedings{f03042f95ba14afabb8f74bd9409d03c,
title = "Latent Dirichlet learning for hierarchical segmentation",
abstract = "Topic model can be established by using Dirichlet distributions as the prior model to characterize latent topics in natural language. However, topics in real-world stream data are non-stationary. Training a reliable topic model is a challenging study. Further, the usage of words in different paragraphs within a document is varied due to different composition styles. This study presents a hierarchical segmentation model by compensating the heterogeneous topics in stream level and the heterogeneous words in document level. The topic similarity between sentences is calculated to form a beta prior for stream-level segmentation. This segmentation prior is adopted to group topic-coherent sentences into a document. For each pseudo-document, we incorporate a Markov chain to detect stylistic segments within a document. The words in a segment are generated by identical composition style. This new model is inferred by a variational Bayesian EM procedure. Experimental results show benefits by using the proposed model in terms of perplexity and F measure.",
keywords = "Graphical Model, Hierarchical Segmentation, Machine Learning, Topic Model",
author = "Chien, {Jen Tzung} and Chueh, {Chuang Hua}",
year = "2012",
month = sep,
doi = "10.1109/MLSP.2012.6349772",
language = "英语",
isbn = "9781467310260",
series = "IEEE International Workshop on Machine Learning for Signal Processing, MLSP",
booktitle = "2012 IEEE International Workshop on Machine Learning for Signal Processing - Proceedings of MLSP 2012",
note = "2012 22nd IEEE International Workshop on Machine Learning for Signal Processing, MLSP 2012 ; Conference date: 23-09-2012 Through 26-09-2012",
}