9月份初,因为人行LPR的事情,帮着原单位同事做了点数据处理。如今我已习惯了用代码来处理数据,而不是Excel,当然写文档和做幻灯片(PPT仅仅是微软的幻灯片生成工具,我更喜欢叫幻灯片)也喜欢在RStudio中利用Rmarkdown中完成。对于少量非重复性的工作,用Excel或许是合适的,但是对于数据量达到上万行甚至上百万行的时候,Excel运行起来就很吃力了,更重要的是通过鼠标点击操纵Excel得到结果没有记录数据处理的整个过程,下次要想得到同样的结果还得重新理清思路然后一顿点点点,费时费力,而通过写代码处理数据就不会存在上面的问题,虽然学习写代码需要花费一定的时间。在我看来写代码处理数据至少有以下几个方面的好处:

  • 通过代码得到的结果可行度更好,出错的可能性基本为0,除非表结构变了,或者调用的包版本更新了,而你没有及时更新代码。

  • 通过写代码可以保证结果的可重复性,无论任何时候,只要原始数据还在,重新运行下代码即可获得同样的结果。

  • 代码记录了数据获取、清洗、加工、建模、可视化、生成报告的整个过程,一气呵成,减少了各个软件之间复制粘贴切换,思路不容易被打断,只专注于业务本身。

  • 代码有利于同事之间分享,甚至可以考虑挂在GitHub或者GitLab上,大伙一同改进,集思广益,统一行动。

  • 短期内,写代码可能稍费时费力,但是长期看,绝对是一劳永逸的事情,可以节省大量劳动力,尤其是当你下个月需要处理同样的任务时,你一定会深有体会。同时一旦代码完善后,对生成结果的复核将是多余的。

除非你可以安排小强完成工作(这是对你来说最优雅的方式,只需一条命令,且交互友好),否则强烈建议你命令计算机完成工作。人应该像个人一样活着,而不是整天干着计算机应该干的事情。在你有大量数据疲于应付的时候,在你月月需要出具有相同套路报告的时候,或许你可以考虑搜索下R、Python,写文档和做幻灯片的时候可以考虑搜索下Rmarkdown,或许它能解放你。

Talk is cheap, show me the code.说了太多的废话,下面我直接将这几天自动生成余额和收益率(包括贷款、票据、债券等按照期限、产品维度)的代码放在这里,你不一定要看懂它,但是你需要感受下代码的威力,同时你需要知道除了office办公软件外,还有许多更优雅的方式完成任务。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
##-------------------------------------------------------------##

# 计算贷款各期限各产品的加权平均利率
library(openxlsx)
library(tidyverse)

loans = read.xlsx("./data/loans.xlsx")
loans %>% 
  filter(`序号` != "合计") %>% 
  select(
    `当日余额`, `科目[名称]`, `业务品种[名称]`, 
    `执行月利率(千分比)`, `期限(月)`,
    `客户分类[名称]`, `发放日`
    ) %>% 
  rename(
    	loans = `当日余额`, 
    	item = `科目[名称]`, 
    	product = `业务品种[名称]`,
    	rate = `执行月利率(千分比)`, 
    	term = `期限(月)`,
    	type = `客户分类[名称]`, 
    	issued = `发放日`
    	) %>% 
  mutate(
    item = substring(item, 2),
    product = substring(product, 2),
    rate = 1.2 * rate,
    type = substring(type, 2),
    issued = as.Date(substring(issued, 2))
    ) %>% 
  mutate(line = 
           ifelse(type == "个人经营客户" | 
                    type == "小企业客户", "小企业",
                  ifelse(type == "个人客户", "零售", 
                         ifelse(type == "公司客户", "公司类", 
                                "其他")
                  )
           )
  ) %>% 
  filter(
    product != "银行承兑汇票贴现",
    product != "商业承兑汇票贴现",
    line != "其他"
  ) %>% 
  mutate(
    term_str = 
    ifelse(term / 12 <= 1, "一年以内",
      ifelse(
        term / 12 > 5, "五年以上", "一年到五年"
        )
      )
    )  -> loans_tidy
    
loans_tidy %>% 
  group_by(term_str, line, type, product) %>% 
  summarise(
    rate_weighted = sum(rate * loans / sum(loans))
  ) %>% 
  spread(term_str, rate_weighted) %>% 
  select(
    type, product, `一年以内`,
    `一年到五年`, `五年以上`, line
    ) %>% 
  arrange(line) %>% 
  rename(
    `产品名称` = product,
    `备注` = line
    ) -> loans_rate_weighted
loans_rate_weighted$type = NULL
write.xlsx(loans_rate_weighted, 
           "./result/loans_rate_weighted.xlsx")

##-------------------------------------------------------------##

# 计算贷款各期限各产品的余额
loans_tidy %>%
  group_by(term_str, line, type, product) %>% 
  summarise(
    loans_total = sum(loans)
  ) %>% 
  spread(term_str, loans_total) %>% 
  select(
    type, product, `一年以内`,
    `一年到五年`, `五年以上`, line
  ) %>% 
  arrange(line) %>% 
  rename(
    `产品名称` = product,
    `备注` = line
  ) -> loans_balance
loans_balance$type = NULL
write.xlsx(loans_balance, 
           "./result/loans_balance.xlsx")

##-------------------------------------------------------------##

# 计算2019年新发放贷款的加权平均利率
loans_tidy %>%
  filter(issued >= "2019-01-01") %>% 
  group_by(term_str, line, type, product) %>% 
  summarise(
    rate_weighted = sum(rate * loans / sum(loans))
  ) %>% 
  spread(term_str, rate_weighted) %>% 
  select(
    type, product, `一年以内`,
    `一年到五年`, `五年以上`, line
  ) %>% 
  arrange(line) %>% 
  rename(
    `产品名称` = product,
    `备注` = line
  ) -> loans_new_rate_weighted
loans_new_rate_weighted$type = NULL
write.xlsx(loans_new_rate_weighted, 
           "./result/loans_new_rate_weighted.xlsx")

##-------------------------------------------------------------##

# 计算各期限各种类债券的余额
library(readxl)
bonds = read_xls("./data/bonds.xls", skip = 1)
bonds = na.omit(bonds)

bonds %>% 
  rename(
    term = `剩余期限`,
    type = `账户分类`,
    product = `债券种类`,
    quant = `持债量`
    ) %>% 
  mutate(
    term_str = 
    ifelse(
      term  <= 1, "一年以内",
      ifelse(
        term  > 5, "五年以上", "一年到五年"
      )
    )
  ) %>% 
  group_by(term_str, type, product) %>% 
  summarise(
    quant_total = sum(quant)
  ) %>% 
  spread(term_str, quant_total) %>%
  select(type, product, `一年以内`,
         `一年到五年`, `五年以上`) %>% 
  rename(
    `账户分类` = type,
    `债券种类` = product
  ) -> bonds_balance
  write.xlsx(bonds_balance, 
             "./result/bonds_balance.xlsx")

##-------------------------------------------------------------##

# 计算各期限各种类债券的加权平均利率
bonds %>% 
  rename(
    term = `剩余期限`,
    type = `账户分类`,
    product = `债券种类`,
    quant = `持债量`,
    rate = `票面利率`
  ) %>% 
  mutate(
    rate = rate * 100,
    term_str = 
    ifelse(
      term  <= 1, "一年以内",
      ifelse(
        term  > 5, "五年以上", "一年到五年"
      )
    )
  ) %>% 
  group_by(term_str, type, product) %>% 
  summarise(
    rate_weighted = sum(rate * quant / sum(quant))
  ) %>%
  spread(term_str, rate_weighted) %>%
  select(type, product, `一年以内`,
         `一年到五年`, `五年以上`) %>% 
  rename(
    `账户分类` = type,
    `债券种类` = product
  ) -> bonds_rate_weighted
write.xlsx(bonds_rate_weighted,
           "./result/bonds_rate_weighted.xlsx")

##-------------------------------------------------------------##

# 计算各期限各种类票据融资的加权平均收益率
bills = read_xls("./data/bills.xls", skip = 2)
bills %>% 
  rename(
    type = `票据类型`,
    value = `余额`,
    term = `计息天数`,
    rate = `贴现利率`
  ) %>% 
  mutate(
    term = as.numeric(term),
    rate = as.numeric(rate),
    term_str = 
    ifelse(
      term / 365 <= 1, "一年以内",
      ifelse(
        term / 365 > 5, "五年以上", "一年到五年"
      )
    )
  ) %>% 
  group_by(term_str, type) %>% 
  summarise(
    rate_weighted = sum(rate * value / sum(value))
  ) %>% 
  spread(term_str, rate_weighted) %>% 
  rename(`票据类型` = type) -> bills_rate_weighted
write.xlsx(bills_rate_weighted, 
           "./result/bills_rate_weighted.xlsx")

##-------------------------------------------------------------##

# 计算各期限各种类票据融资的余额
bills %>% 
  rename(
    type = `票据类型`,
    value = `余额`,
    term = `计息天数`,
    rate = `贴现利率`
  ) %>% 
  mutate(
    term = as.numeric(term),
    rate = as.numeric(rate),
    term_str = 
    ifelse(
      term / 365 <= 1, "一年以内",
      ifelse(
        term / 365 > 5, "五年以上", "一年到五年"
      )
    )
  ) %>% 
  group_by(term_str, type) %>% 
  summarise(
    value_total = sum(value)
  ) %>% 
  spread(term_str, value_total) %>% 
  rename(`票据类型` = type) -> bills_balance
write.xlsx(bills_balance, 
           "./result/bills_balance.xlsx")

##-------------------------------------------------------------##