案例来源于古扎拉蒂《经济计量学精要》(第四版)第二章例2-5古董钟与拍价格卖。之所以选择此案例是因为此案例只有3个变量,32个样本,既满足多元回归的最低要求,又便于手工验证。

经典案例:古董钟与拍卖价格

  • Price:古董钟中标的价格
  • Age:钟表的年代
  • Number of Bidders:投标人的个数

加载R包并载入数据

1
2
3
4
library(readxl)
library(tidyverse)
df214 = read_xls("~/eoe/table 2_14.xls", skip = 5)
print(df214)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
## # A tibble: 32 x 4
##    Observation Price   Age `Number of Bidders`
##          <dbl> <dbl> <dbl>               <dbl>
##  1           1  1235   127                  13
##  2           2  1080   115                  12
##  3           3   845   127                   7
##  4           4  1552   150                   9
##  5           5  1047   156                   6
##  6           6  1979   182                  11
##  7           7  1822   156                  12
##  8           8  1253   132                  10
##  9           9  1297   137                   9
## 10          10   946   113                   9
## # ... with 22 more rows

利用R自带函数进行参数估计

1
2
model_df214 = lm(Price~Age + `Number of Bidders`, data = df214)
summary(model_df214)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
## 
## Call:
## lm(formula = Price ~ Age + `Number of Bidders`, data = df214)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -208.60 -119.04   15.73  101.84  212.54 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1336.0493   175.2725  -7.623 2.10e-08 ***
## Age                    12.7414     0.9124  13.965 2.09e-14 ***
## `Number of Bidders`    85.7641     8.8020   9.744 1.19e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134.6 on 29 degrees of freedom
## Multiple R-squared:  0.8906,	Adjusted R-squared:  0.8831 
## F-statistic: 118.1 on 2 and 29 DF,  p-value: 1.161e-14

手动计算参数值和各个统计量(含截距项)

生成矩阵X

1
2
3
4
5
df214 %>%
  mutate(X1 = 1) %>%
  select(X1, Age, `Number of Bidders`) %>%
  as.matrix() ->X
print(X)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
##       X1 Age Number of Bidders
##  [1,]  1 127                13
##  [2,]  1 115                12
##  [3,]  1 127                 7
##  [4,]  1 150                 9
##  [5,]  1 156                 6
##  [6,]  1 182                11
##  [7,]  1 156                12
##  [8,]  1 132                10
##  [9,]  1 137                 9
## [10,]  1 113                 9
## [11,]  1 137                15
## [12,]  1 117                11
## [13,]  1 170                14
## [14,]  1 182                 8
## [15,]  1 162                11
## [16,]  1 184                10
## [17,]  1 143                 6
## [18,]  1 159                 9
## [19,]  1 108                14
## [20,]  1 175                 8
## [21,]  1 108                 6
## [22,]  1 179                 9
## [23,]  1 111                15
## [24,]  1 187                 8
## [25,]  1 137                 8
## [26,]  1 153                 6
## [27,]  1 117                13
## [28,]  1 126                10
## [29,]  1 111                 7
## [30,]  1 115                 7
## [31,]  1 194                 5
## [32,]  1 168                 7

生成矩阵Y

1
2
3
4
df214 %>%
  select(Price) %>%
  as.matrix() ->Y
print(Y)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
##       Price
##  [1,]  1235
##  [2,]  1080
##  [3,]   845
##  [4,]  1552
##  [5,]  1047
##  [6,]  1979
##  [7,]  1822
##  [8,]  1253
##  [9,]  1297
## [10,]   946
## [11,]  1713
## [12,]  1024
## [13,]  2131
## [14,]  1550
## [15,]  1884
## [16,]  2041
## [17,]   854
## [18,]  1483
## [19,]  1055
## [20,]  1545
## [21,]   729
## [22,]  1792
## [23,]  1175
## [24,]  1593
## [25,]  1147
## [26,]  1092
## [27,]  1152
## [28,]  1336
## [29,]   785
## [30,]   744
## [31,]  1356
## [32,]  1262

计算样本容量n和自由度df

1
2
3
4
n = nrow(df214)
k = ncol(X)
df = n - k
print(c(n=n, k=k, df=df))
1
2
##  n  k df 
## 32  3 29

估计参数Beta_hat

1
2
3
Beta_hat = solve(t(X) %*% X) %*% t(X) %*% Y
colnames(Beta_hat) = "Beta_hat"
print(Beta_hat)
1
2
3
4
##                      Beta_hat
## X1                -1336.04929
## Age                  12.74138
## Number of Bidders    85.76407

计算残差residuals

1
2
3
residuals = df214$Price - (X %*% Beta_hat)
colnames(residuals) = "residuals"
print(residuals)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
##        residuals
##  [1,] -162.03929
##  [2,]  -78.37862
##  [3,]  -37.45489
##  [4,]  204.96516
##  [5,] -119.19094
##  [6,]   52.71275
##  [7,]  141.22465
##  [8,]   49.54599
##  [9,]  115.60314
## [10,]   70.39635
## [11,]   17.01874
## [12,]  -74.09732
## [13,]  100.31715
## [14,] -118.99505
## [15,]  212.54042
## [16,]  174.99405
## [17,] -146.55296
## [18,]   21.29270
## [19,] -185.71707
## [20,]  -34.80536
## [21,]  174.39547
## [22,]   75.46503
## [23,] -189.70529
## [24,] -139.70197
## [25,]   51.36721
## [26,]  -35.96679
## [27,] -117.62546
## [28,]  208.99429
## [29,]  106.40725
## [30,]   14.44171
## [31,] -208.59945
## [32,] -142.85161

计算回归标准误sigma_hat

1
2
sigma_hat = sqrt((t(residuals) %*% residuals) / df)[1,1]
print(c(sigma_hat = sigma_hat))
1
2
## sigma_hat 
##  134.6083

计算估计量的方差与标准误

1
2
3
4
5
6
7
B_hat_Var = solve(t(X) %*% X) * (sigma_hat)^2
b1_se = sqrt(B_hat_Var[1, 1])
b2_se = sqrt(B_hat_Var[2, 2])
b3_se = sqrt(B_hat_Var[3, 3])
print(c(`se(b1)` = b1_se, 
        `se(b2)`  = b2_se, 
        `se(b3)`  = b3_se))
1
2
##      se(b1)      se(b2)      se(b3) 
## 175.2724965   0.9123559   8.8019949

计算t统计量

1
2
3
4
5
6
t_b1 = Beta_hat[1, 1] / b1_se
t_b2 = Beta_hat[2, 1] / b2_se
t_b3 = Beta_hat[3, 1] / b3_se
print(c(t_b1 = t_b1,
        t_b2 = t_b2,
        t_b3 = t_b3))
1
2
##      t_b1      t_b2      t_b3 
## -7.622698 13.965366  9.743708

计算拟合优度R_squared

1
2
3
4
5
TSS = sum((df214$Price - mean(df214$Price))^2)
RSS = (t(residuals) %*% residuals)[1, 1]
ESS = TSS - RSS
R_squared = 1 - RSS/TSS
print(c(R_squared = R_squared))
1
2
## R_squared 
## 0.8906143

计算修正的拟合优度R_squared_adjusted

1
2
R_squared_adjusted  = 1 - (1-R_squared) * (n - 1) / (n - k)
print(c(R_squared_adjusted = R_squared_adjusted))
1
2
## R_squared_adjusted 
##          0.8830705

计算F统计量

1
2
3
F_value = (R_squared/(k - 1)) /
   ((1 - R_squared) / (n - k))
print(c(F_value = F_value))
1
2
##  F_value 
## 118.0585