楚新元 | All in R

Welcome to R Square

用 R 获取中国国债收益率曲线数据

楚新元 / 2021-08-18


这里直接给出代码如下,读者自行体验。

get_cn_gb_yield = function(from, to) {
  
  # 规范化日期格式
  from = lubridate::ymd(from)
  to = lubridate::ymd(to)
  
  # 参数检查,参数输入错误后给出必要的提示
  test.internet <- curl::has_internet()
  if (!test.internet) {
    stop('没有发现网络链接...')
  }
  
  if (is.na(as.Date(as.character(from), format = '%Y-%m-%d')) |
      is.na(as.Date(as.character(to), format = '%Y-%m-%d'))) {
    stop('输入的起止日期参数必须是包含年月日的字符。')
  }
  
  if ( to < from ) {
    stop('发现期初日期 > 期末日期,你把两者弄混了吗?')
  }
  
  if ( to > Sys.Date() ) {
    stop('输入的期末日期不能大于当前日期。')
  }
  
  if ( from < "2006-03-01" ) {
    stop('数据库不包含2006的3月1日之前的数据。')
  }
  
  # 生成从起止时间段的完整的日期向量
  dates = seq.Date(from = from, to = to, by = "day")
  
  # 生成每一天的数据下载地址
  url = paste0(
    "http://yield.chinabond.com.cn/cbweb-mn/yc/downBzqxDetail?ycDefIds=2c9081e50a2f9606010a3068cae70001&&zblx=txy&&workTime=",
    dates,
    "&&dxbj=0&&qxlx=0,&&yqqxN=N&&yqqxK=K&&wrjxCBFlag=0&&locale=zh_CN"
  )
  
  # 通过for循环下载数据到data文件夹下
  if (dir.exists("data") == FALSE) dir.create("data")
  for (i in 1:length(url)) {
    download.file(
      url = url[i],
      destfile = paste0("./data/", dates[i], ".xlsx"),
      mode = "wb",
      quiet = TRUE
    )
  }
  
  # 批量读取文件大小不为0的.xlsx文件后合并成一个数据框,同时删除data文件
  data_list = fs::dir_ls(path = "./data", glob = "*.xlsx")
  data_list = data_list[file.info(data_list)$size != 0]
  names(data_list) = data_list
  df = purrr::map_dfr(data_list, readxl::read_xlsx, .id = "Date")
  purrr::walk(fs::dir_ls(path = "./data"), fs::file_delete)
  fs::dir_delete("data")
  
  # 必要的数据清洗加工
  df[, 2] = NULL
  colnames(df)[2:3] = c("Term", "Rate")
  df$Rate = as.numeric(df$Rate)
  df$Date = gsub("./data/", "", df$Date)
  df$Date = gsub(".xlsx", "", df$Date)
  df$Term[df$Term == 0] = "D1"
  df$Term[df$Term == 0.08] = "M1"
  df$Term[df$Term == 0.17] = "M2"
  df$Term[df$Term == 0.25] = "M3"
  df$Term[df$Term == 0.5] = "M6"
  df$Term[df$Term == 0.75] = "M9"
  df$Term[df$Term ==1] = "Y1"
  df$Term[df$Term ==2] = "Y2"
  df$Term[df$Term ==3] = "Y3"
  df$Term[df$Term ==5] = "Y5"
  df$Term[df$Term ==7] = "Y7"
  df$Term[df$Term ==10] = "Y10"
  df$Term[df$Term ==15] = "Y15"
  df$Term[df$Term ==20] = "Y20"
  df$Term[df$Term ==30] = "Y30"
  df$Term[df$Term ==40] = "Y40"
  df$Term[df$Term ==50] = "Y50"
  df = tidyr::pivot_wider(
    df, names_from = Term, values_from = Rate
  )
  
  # 导出数据到Excel文件中
  writexl::write_xlsx(df, "result.xlsx")

}