如果对于不同的样本点,随机误差项之间不再是不相关的,而是存在某种相关性,则认为出现了序列相关性。 古扎拉蒂《经济计量学精要》表10-7给出的1980 ~ 2006年间股票价格和GDP的数据。

载入数据

1
2
3
4
library(readxl)
library(tidyverse)
df107 = read_xls("~/eoe/table 10_7.xls", skip = 4, n_max = 27)[,2:3]
print(df107)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
## # A tibble: 27 x 2
##     NYSE   GDP
##    <dbl> <dbl>
##  1  720. 2790.
##  2  783. 3128.
##  3  729. 3255 
##  4  980. 3537.
##  5  977. 3933.
##  6 1143. 4220.
##  7 1438. 4463.
##  8 1710. 4740.
##  9 1585. 5104.
## 10 1903. 5484.
## # ... with 17 more rows

建立一元线性回归模型

1
2
model = lm(NYSE ~ GDP, data=df107)
summary(model)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
## 
## Call:
## lm(formula = NYSE ~ GDP, data = df107)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1002.3  -445.8  -101.1   322.6  1404.1 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.015e+03  3.063e+02  -6.579 6.82e-07 ***
## GDP          7.723e-01  3.957e-02  19.517  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 615.3 on 25 degrees of freedom
## Multiple R-squared:  0.9384,	Adjusted R-squared:  0.9359 
## F-statistic: 380.9 on 1 and 25 DF,  p-value: < 2.2e-16

观察残差走势

1
2
plot(model$residuals, col="red")
abline(h=0, col="blue")

从图中可以看出,残差呈现出有规律的波动,初步判断模型存在自相关。

观察残差和残差滞后一阶的散点图

1
2
3
4
e = model$residuals
plot(e[-1] ~ e[-nrow(df107)], xlab="e(-1)", ylab="e", col="red")
abline(h=0, col="blue")
abline(v=0, col="blue")

残差滞后一阶和残差散点图上,散点主要分布在一三象限,初步判断模型存在正自相关。

自相关图

1
acf(df107$NYSE)

做DW一阶自相关检验

1
2
library(lmtest)
dwtest(model)
1
2
3
4
5
6
## 
## 	Durbin-Watson test
## 
## data:  model
## DW = 0.4285, p-value = 2.373e-08
## alternative hypothesis: true autocorrelation is greater than 0

当n = 27, K’ = 1时,显著性水平为5%的DW统计量临界值为1.316和1.469。 因为DW统计量的值为0.4285,小于1.316,因此模型存在正一阶自相关。

LM检验

1
bgtest(model)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model
## LM test = 15.923, df = 1, p-value = 6.599e-05

LM检验拒绝无自相关原假设,表明模型存在自相关。

ρ值通过DW值估计得到,用OLS估计广义差分方程

根据DW值计算ρ值

1
2
3
ρ_hat_dw = 1 - dwtest(model)$statistic/2
names(ρ_hat_dw) = "ρ_hat_dw"
print(ρ_hat_dw)
1
2
##  ρ_hat_dw 
## 0.7857516

利用广义差分法估计原模型的b1和b2(此处舍去第一个观测值)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
df107 %>%
  mutate(
    NYSE_star = NYSE - ρ_hat_dw * lag(NYSE, 1),  # 此处lag是滞后的意思
    GDP_star = GDP - ρ_hat_dw * lag(GDP, 1)
  ) %>%
  na.omit() %>%
  lm(NYSE_star ~ GDP_star, data=.) ->model_star_lack
  # 此处差分损失一个样本。
  # 如果样本量足够多,可以不用补齐第一个缺失值。
  # 如果样本量小,可以通过sqrt(1-p^2)*Y[1]、sqrt(1-p^2)*X[1]补齐。

b1_lack = coef(model_star_lack)[1]/(1-ρ_hat_dw)
b2_lack = coef(model_star_lack)[2]
names(b2_lack) = "GDP"

summary(model_star_lack)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
## 
## Call:
## lm(formula = NYSE_star ~ GDP_star, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -996.21 -150.47   22.85  177.56  726.96 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -617.9540   205.3087  -3.010  0.00606 ** 
## GDP_star       0.8624     0.1018   8.468 1.14e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 378.4 on 24 degrees of freedom
## Multiple R-squared:  0.7492,	Adjusted R-squared:  0.7388 
## F-statistic: 71.71 on 1 and 24 DF,  p-value: 1.136e-08
1
2
# 利用DW-test对model_star_lack模型进行检验
dwtest(model_star_lack)
1
2
3
4
5
6
## 
## 	Durbin-Watson test
## 
## data:  model_star_lack
## DW = 0.91175, p-value = 0.0004738
## alternative hypothesis: true autocorrelation is greater than 0
1
2
# 利用LM-test对model_star_lack模型进行检验
bgtest(model_star_lack)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model_star_lack
## LM test = 7.6287, df = 1, p-value = 0.005745

还原后的估计方程:NYSE^ = -2884.2875152 + 0.8624021 * GDP

利用广义差分法估计原模型的b1和b2(此处包含第一个观测值)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
df107 %>%
  mutate(
    NYSE_star = NYSE - ρ_hat_dw * lag(NYSE, 1), 
    GDP_star = GDP - ρ_hat_dw * lag(GDP, 1),
    NYSE_star_1 = sqrt(1-ρ_hat_dw^2) * NYSE * c(1,rep(0,nrow(df107)-1)),
    GDP_star_1 = sqrt(1-ρ_hat_dw^2) * GDP * c(1,rep(0,nrow(df107)-1))
  )->df107_NA

df107_NA$NYSE_star[is.na(df107_NA$NYSE_star)] = 0
df107_NA$GDP_star[is.na(df107_NA$GDP_star)] = 0
    
df107_NA %>%
  mutate(
    NYSE_star_full = NYSE_star + NYSE_star_1,
    GDP_star_full = GDP_star + GDP_star_1
    ) %>%
  lm(NYSE_star_full ~ GDP_star_full, data=.) ->model_star_full

b1_full = coef(model_star_full)[1]/(1-ρ_hat_dw)
b2_full = coef(model_star_full)[2]
names(b2_full) = "GDP"

summary(model_star_full)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
## 
## Call:
## lm(formula = NYSE_star_full ~ GDP_star_full, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -983.39 -179.82   37.78  194.13  741.10 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -642.2334   204.9785  -3.133  0.00437 ** 
## GDP_star_full    0.8670     0.1022   8.484 7.91e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 379.9 on 25 degrees of freedom
## Multiple R-squared:  0.7422,	Adjusted R-squared:  0.7319 
## F-statistic: 71.99 on 1 and 25 DF,  p-value: 7.912e-09
1
2
# 利用DW-test对model_star_full模型进行检验
dwtest(model_star_full)
1
2
3
4
5
6
## 
## 	Durbin-Watson test
## 
## data:  model_star_full
## DW = 0.92478, p-value = 0.000475
## alternative hypothesis: true autocorrelation is greater than 0
1
2
# 利用LM-test对model_star_full模型进行检验
bgtest(model_star_full)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model_star_full
## LM test = 7.0339, df = 1, p-value = 0.007998

还原后的估计方程:NYSE^ = -2997.6113989 + 0.8669661 * GDP

ρ值通过OLS残差估计得到

根据OLS残差估计ρ值

1
2
3
4
uxfit = lm(model$residuals ~ 0 + lag(model$residuals), data=df107)
ρ_hat_ols = coef(uxfit)[1]
names(ρ_hat_ols) = "ρ_hat_ols"
print(ρ_hat_ols)
1
2
## ρ_hat_ols 
## 0.7688697

利用广义差分法估计原模型的b1和b2(此处舍去第一个观测值)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
df107 %>%
  mutate(
    NYSE_star = NYSE - ρ_hat_ols * lag(NYSE, 1),  # 此处lag是滞后的意思
    GDP_star = GDP - ρ_hat_ols * lag(GDP, 1)
  ) %>%
  na.omit() %>%
  lm(NYSE_star ~ GDP_star, data=.) ->model_star_lack2
  # 此处差分损失一个样本。
  # 如果样本量足够多,可以不用补齐第一个缺失值。
  # 如果样本量小,可以通过sqrt(1-p^2)*Y[1]、sqrt(1-p^2)*X[1]补齐。

b1_lack2 = coef(model_star_lack2)[1]/(1-ρ_hat_ols)
b2_lack2 = coef(model_star_lack2)[2]
names(b2_lack2) = "GDP"

summary(model_star_lack2)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
## 
## Call:
## lm(formula = NYSE_star ~ GDP_star, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -985.72 -149.82   21.47  178.29  735.02 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -647.21032  205.01705  -3.157  0.00426 ** 
## GDP_star       0.85469    0.09573   8.928 4.29e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 378.5 on 24 degrees of freedom
## Multiple R-squared:  0.7686,	Adjusted R-squared:  0.759 
## F-statistic: 79.71 on 1 and 24 DF,  p-value: 4.286e-09
1
2
# 利用DW-test对model_star_lack2模型进行检验
dwtest(model_star_lack2)
1
2
3
4
5
6
## 
## 	Durbin-Watson test
## 
## data:  model_star_lack2
## DW = 0.9025, p-value = 0.0004222
## alternative hypothesis: true autocorrelation is greater than 0
1
2
# 利用LM-test对model_star_lack2模型进行检验
bgtest(model_star_lack2)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model_star_lack2
## LM test = 7.7581, df = 1, p-value = 0.005347

还原后的估计方程:NYSE^ = -2800.1964927 + 0.8546938 * GDP

利用广义差分法估计原模型的b1和b2(此处包含第一个观测值)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
df107 %>%
  mutate(
    NYSE_star = NYSE - ρ_hat_ols * lag(NYSE, 1), 
    GDP_star = GDP - ρ_hat_ols * lag(GDP, 1),
    NYSE_star_1 = sqrt(1-ρ_hat_ols^2) * NYSE * c(1,rep(0,nrow(df107)-1)),
    GDP_star_1 = sqrt(1-ρ_hat_ols^2) * GDP * c(1,rep(0,nrow(df107)-1))
  )->df107_NA2

df107_NA2$NYSE_star[is.na(df107_NA2$NYSE_star)] = 0
df107_NA2$GDP_star[is.na(df107_NA2$GDP_star)] = 0

df107_NA2 %>%
  mutate(
    NYSE_star_full = NYSE_star + NYSE_star_1,
    GDP_star_full = GDP_star + GDP_star_1
  ) %>%
  lm(NYSE_star_full ~ GDP_star_full, data=.) ->model_star_full2

b1_full2 = coef(model_star_full)[1]/(1-ρ_hat_ols)
b2_full2 = coef(model_star_full)[2]
names(b2_full2) = "GDP"

summary(model_star_full2)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
## 
## Call:
## lm(formula = NYSE_star_full ~ GDP_star_full, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -974.06 -180.71   36.12  194.15  748.42 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -673.47297  204.23432  -3.298  0.00292 ** 
## GDP_star_full    0.86014    0.09591   8.968 2.75e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 379.8 on 25 degrees of freedom
## Multiple R-squared:  0.7629,	Adjusted R-squared:  0.7534 
## F-statistic: 80.43 on 1 and 25 DF,  p-value: 2.752e-09
1
2
# 利用DW-test对model_star_full2模型进行检验
dwtest(model_star_full2)
1
2
3
4
5
6
## 
## 	Durbin-Watson test
## 
## data:  model_star_full2
## DW = 0.91782, p-value = 0.0004343
## alternative hypothesis: true autocorrelation is greater than 0
1
2
# 利用LM-test对model_star_full2模型进行检验
bgtest(model_star_full2)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model_star_full2
## LM test = 7.1581, df = 1, p-value = 0.007463

还原后的估计方程:NYSE^ = -2778.6637516 + 0.8669661 * GDP

利用一阶差分法对变换后的模型进行估计

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
df107 %>%
  mutate(
    NYSE_star = NYSE - lag(NYSE, 1),
    GDP_star = GDP - lag(GDP, 1)
  ) %>%
  na.omit() %>%
  lm(NYSE_star ~ 0 + GDP_star, data=.) ->model_star_3

b2_3 = coef(model_star_3)
names(b2_3) = "GDP"
b1_3 = mean(df107$NYSE) - b2_3 * mean(df107$GDP)
names(b1_3 ) = "Intercept"

summary(model_star_3)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
## 
## Call:
## lm(formula = NYSE_star ~ 0 + GDP_star, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1115.61  -238.45   -34.67   103.35   616.88 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## GDP_star   0.8684     0.1829   4.749 7.14e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 400.6 on 25 degrees of freedom
## Multiple R-squared:  0.4743,	Adjusted R-squared:  0.4533 
## F-statistic: 22.56 on 1 and 25 DF,  p-value: 7.136e-05
1
2
# 利用LM-test对model_star_3模型进行检验
bgtest(model_star_3)
1
2
3
4
5
## 
## 	Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model_star_3
## LM test = 7.0803, df = 1, p-value = 0.007794

还原后的估计方程:NYSE^ = -2701.4791882 + 0.8684273 * GDP