准备数据
本文使用由I-Cheng Yeh捐赠给UCI机器学习数据库的关于混凝土抗压强度的数据。
1
2
3
|
# 加载数据
concrete = read.csv("C:/Users/cxy/Documents/MLwR/concrete.csv")
head(concrete)
|
1
2
3
4
5
6
7
|
## cement slag ash water superplastic coarseagg fineagg age strength
## 1 141.3 212.0 0.0 203.5 0.0 971.8 748.5 28 29.89
## 2 168.9 42.2 124.3 158.3 10.8 1080.8 796.2 14 23.51
## 3 250.0 0.0 95.7 187.4 5.5 956.9 861.2 28 29.22
## 4 266.0 114.0 0.0 228.0 0.0 932.0 670.0 28 45.85
## 5 154.8 183.4 0.0 193.3 9.1 1047.4 696.7 28 18.29
## 6 255.0 0.0 0.0 192.0 0.0 889.8 945.0 90 21.86 |
1
2
3
4
5
6
7
8
9
10
11
12
13
|
# min-max标准化处理函数
normalize_mm = function(x){
return ((x-min(x)) / (max(x)-min(x)))
}
# 对数据每一列进行min-max标准化处理
concrete_norm = as.data.frame(lapply(concrete, normalize_mm))
# 生成训练数据集和验证数据集
set.seed(1234) # 设置随机数种子,方便重复性研究
train = sample(nrow(concrete_norm), 0.75*nrow(concrete_norm)) # 原数据的75%用来训练模型
concrete_train = concrete_norm[train,] # concrete_train为训练数据集
concrete_validate = concrete_norm[-train,] # concrete_validate为验证数据集
|
- cement:产品中使用的水泥
- slag:矿渣
- ash:灰
- water:水
- supperplastic:超塑化剂
- coarse aggregate:粗集料
- fine aggregate:细集料
- aging time:老化时间
- strength:混凝土强度
基于数据训练模型
1
2
3
4
5
|
library(neuralnet)
concrete_model = neuralnet(strength ~ cement + slag + ash + water +
superplastic + coarseagg + fineagg + age,
data = concrete_train)
plot(concrete_model)
|

评估模型的性能
1
2
3
|
model_results = compute(concrete_model, concrete_validate[1:8])
predicted_strength = model_results$net.result
cor(predicted_strength, concrete_validate$strength)
|
1
2
|
## [,1]
## [1,] 0.8012714 |
提高模型性能
1
2
3
4
5
6
7
8
|
concrete_model2 = neuralnet(strength ~ cement + slag + ash + water +
superplastic + coarseagg + fineagg + age,
data = concrete_train,
hidden = 5)
plot(concrete_model2)
model_results2 = compute(concrete_model2, concrete_validate[1:8])
predicted_strength2 = model_results2$net.result
cor(predicted_strength2, concrete_validate$strength)
|

1
2
|
## [,1]
## [1,] 0.9302541 |