上QQ阅读APP看书,第一时间看更新
2.4.4 采用正则化避免过拟合
假设我们还想进一步提升模型的预测能力,希望通过下面更复杂的神经网络模型来实现。模型训练好后,通过plot()函数查看每一个训练周期的训练集和验证集的损失值和准确率。结果如图2-15所示。
> bigger_model <- keras_model_sequential() > bigger_model %>% + layer_dense(units = 128,kernel_initializer = 'uniform', + activation = 'relu',input_shape = c(16)) %>% + layer_dense(units = 64,kernel_initializer = 'uniform', + activation = 'relu') %>% + layer_dense(units = 32,kernel_initializer = 'uniform', + activation = 'relu') %>% + layer_dense(units = 16,kernel_initializer = 'uniform', + activation = 'relu') %>% + layer_dense(units = 8,kernel_initializer = 'uniform', + activation = 'relu') %>% + layer_dense(units = 4,kernel_initializer = 'uniform', + activation = 'relu') %>% + layer_dense(units = 1,kernel_initializer = 'uniform', + activation = 'sigmoid') > bigger_model %>% compile(loss='binary_crossentropy', + optimizer='rmsprop', + metrics=c('accuracy')) > history <- bigger_model %>% fit( + x_train_scale, y_train, + batch_size = 128, + epochs = 150, + verbose = 1, + validation_split = 0.2 + ) > plot(history)
图2-15 训练和验证的损失及准确率
从图2-15可知,随着训练周期增加,训练集的损失不断减少,准确率不断提升;但是验证集在训练周期大于60后,损失不断增加,准确率也有所下降。出现明显过拟合现象。
我们利用L2正则化重新训练一个模型,并对比两者差异。结果如图2-16所示。
> # 增加L2正则化 > l2_model <- keras_model_sequential() > l2_model %>% + layer_dense(units = 128,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu',input_shape = c(16)) %>% + layer_dense(units = 64,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu') %>% + layer_dense(units = 32,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu') %>% + layer_dense(units = 16,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu') %>% + layer_dense(units = 8,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu') %>% + layer_dense(units = 4,kernel_initializer = 'uniform', + kernel_regularizer = regularizer_l2(l = 0.001), + activation = 'relu') %>% + layer_dense(units = 1,kernel_initializer = 'uniform', + activation = 'sigmoid') > > l2_model %>% compile(loss='binary_crossentropy', + optimizer='rmsprop', + metrics=c('accuracy')) > l2_history <- l2_model %>% fit( + x_train_scale, y_train, + batch_size = 128, + epochs = 150, + verbose = 0, + validation_split = 0.2 +) > # 对比两个模型效果 > library(ggplot2) > library(tidyr) > library(dplyr) > library(tibble) > compare_cx <- data.frame( + baseline_train = history$metrics$loss, + baseline_val = history$metrics$val_loss, + l2_train = l2_history$metrics$loss, + l2_val = l2_history$metrics$val_loss + ) %>% + rownames_to_column() %>% + mutate(rowname = as.integer(rowname)) %>% + gather(key = "type", value = "value", -rowname) > ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + + geom_line() + + xlab("epoch") + + ylab("loss") + + theme_bw()+ + theme( + legend.position="none", + plot.title=element_text(colour="gray24",size=12,face="bold"), + plot.background = element_rect(fill = "gray90"), + axis.title=element_text(size=10), + axis.text=element_text(colour="gray35"))
图2-16 正则化前后效果对比
图2-16中靠下的两条线分别是增加L2正则化后的训练集和验证集的损失曲线。在训练周期后期两条曲线的差异明显小于上面两条曲线。