用 R 获取中国国债收益率曲线数据
楚新元 / 2021-08-18
这里直接给出代码如下,读者自行体验。
get_cn_gb_yield = function(from, to) {
# 规范化日期格式
from = lubridate::ymd(from)
to = lubridate::ymd(to)
# 参数检查,参数输入错误后给出必要的提示
test.internet <- curl::has_internet()
if (!test.internet) {
stop('没有发现网络链接...')
}
if (is.na(as.Date(as.character(from), format = '%Y-%m-%d')) |
is.na(as.Date(as.character(to), format = '%Y-%m-%d'))) {
stop('输入的起止日期参数必须是包含年月日的字符。')
}
if ( to < from ) {
stop('发现期初日期 > 期末日期,你把两者弄混了吗?')
}
if ( to > Sys.Date() ) {
stop('输入的期末日期不能大于当前日期。')
}
if ( from < "2006-03-01" ) {
stop('数据库不包含2006的3月1日之前的数据。')
}
# 生成从起止时间段的完整的日期向量
dates = seq.Date(from = from, to = to, by = "day")
# 生成每一天的数据下载地址
url = paste0(
"http://yield.chinabond.com.cn/cbweb-mn/yc/downBzqxDetail?ycDefIds=2c9081e50a2f9606010a3068cae70001&&zblx=txy&&workTime=",
dates,
"&&dxbj=0&&qxlx=0,&&yqqxN=N&&yqqxK=K&&wrjxCBFlag=0&&locale=zh_CN"
)
# 通过for循环下载数据到data文件夹下
if (dir.exists("data") == FALSE) dir.create("data")
for (i in 1:length(url)) {
download.file(
url = url[i],
destfile = paste0("./data/", dates[i], ".xlsx"),
mode = "wb",
quiet = TRUE
)
}
# 批量读取文件大小不为0的.xlsx文件后合并成一个数据框,同时删除data文件
data_list = fs::dir_ls(path = "./data", glob = "*.xlsx")
data_list = data_list[file.info(data_list)$size != 0]
names(data_list) = data_list
df = purrr::map_dfr(data_list, readxl::read_xlsx, .id = "Date")
purrr::walk(fs::dir_ls(path = "./data"), fs::file_delete)
fs::dir_delete("data")
# 必要的数据清洗加工
df[, 2] = NULL
colnames(df)[2:3] = c("Term", "Rate")
df$Rate = as.numeric(df$Rate)
df$Date = gsub("./data/", "", df$Date)
df$Date = gsub(".xlsx", "", df$Date)
df$Term[df$Term == 0] = "D1"
df$Term[df$Term == 0.08] = "M1"
df$Term[df$Term == 0.17] = "M2"
df$Term[df$Term == 0.25] = "M3"
df$Term[df$Term == 0.5] = "M6"
df$Term[df$Term == 0.75] = "M9"
df$Term[df$Term ==1] = "Y1"
df$Term[df$Term ==2] = "Y2"
df$Term[df$Term ==3] = "Y3"
df$Term[df$Term ==5] = "Y5"
df$Term[df$Term ==7] = "Y7"
df$Term[df$Term ==10] = "Y10"
df$Term[df$Term ==15] = "Y15"
df$Term[df$Term ==20] = "Y20"
df$Term[df$Term ==30] = "Y30"
df$Term[df$Term ==40] = "Y40"
df$Term[df$Term ==50] = "Y50"
df = tidyr::pivot_wider(
df, names_from = Term, values_from = Rate
)
# 导出数据到Excel文件中
writexl::write_xlsx(df, "result.xlsx")
}