@article{Grzegorczyk_Kurdziel_Wójcik_2015, title={Effects of Sparse Initialization in Deep Belief Networks}, volume={16}, url={https://journals.agh.edu.pl/csci/article/view/1232}, DOI={10.7494/csci.2015.16.4.313}, abstractNote={Deep neural networks are often trained in two phases: first hidden layers are pretrained in an unsupervised manner and then network is fine-tuned with error backpropagation. Pretraining is often carried out using Deep Belief Networks (DBNs), with initial weights set to small random values. However, recent results established that well-designed initialization schemes, e.g. Sparse Initialization (SI), can greatly improve performance of networks that do not use pretraining. An interesting question arising from these results is whether such initialization techniques wouldn’t also improve pretrained networks? To shed light on this question, in this work we evaluate SI in DBNs that are used to pretrain discriminative networks. The motivation behind this research is our observation that SI has an impact on the features learned by a DBN during pretraining. Our results demonstrate that this improves network performance: when pretraining starts from sparsely initialized weight matrices networks achieve lower classification error after fine-tuning.}, number={4}, journal={Computer Science}, author={Grzegorczyk, Karol and Kurdziel, Marcin and Wójcik, Piotr Iwo}, year={2015}, month={Dec.}, pages={313} }