用R清洗加工数据非常方便,但是R输出的都是规整的数据框。现实当中每天要报的报表还有一些“乱七八糟”的要素,比如标题、报告日期、数据单位、制表、复核、保密等级等。当然有了最重要的数据部分,导出到Excel文件里手动添加剩余的哪些“乱七八糟”的要素也是可以的,如果这个报表只制作一次,我也更倾向于这样做,发挥R和Excel各自的优势。但是如果这个报表是每天都要报送的日报呢?我是绝对不能容忍每天手动做重复性的工作的,用R全自动化处理绝对是最佳选项。下面我们看一个例子:

加载相关R包

1
2
3
4
library(dplyr)
library(kableExtra)
library(openxlsx)
library(lubridate)

生成示例数据

R里面清洗加工数据这块的教程太多了,这里不是我要讲的重点,因此,这里直接随机生成一个数据框。

1
2
3
4
5
v = rnorm(168, 100, 5)
m = matrix(v, ncol = 8)
daily = as.data.frame(m)
colnames(daily) = paste0("x", 1:8)
daily %>% kable() %>% kable_styling(font_size = 12)
x1 x2 x3 x4 x5 x6 x7 x8
99.84290 109.24921 104.88487 96.84534 88.33060 98.61825 101.07965 95.29154
95.44127 89.49064 102.01245 92.60338 97.87719 107.84111 104.49900 102.67968
91.45239 98.61124 90.05239 104.90604 100.56452 103.87428 108.55722 100.80179
99.54620 99.61227 104.01013 108.72346 92.45358 107.22070 108.58786 97.50248
97.24600 104.38164 92.12796 99.91803 93.44502 103.04504 107.32875 88.97769
104.95252 97.57314 91.42042 92.37889 97.15795 101.22746 96.94072 100.65788
100.13574 100.02978 95.79745 98.15251 102.35198 104.75862 95.25714 100.79618
97.76205 101.10656 98.97976 102.16607 105.40002 103.68302 100.81557 97.45314
93.75522 97.39083 94.94486 98.57147 99.63517 110.45601 95.41892 108.36261
99.86049 93.97098 93.60875 102.38046 97.26720 104.15922 96.75553 92.99162
96.46511 111.30827 104.76454 103.09249 100.34949 97.56863 100.12107 95.19653
95.36802 100.18289 100.66387 100.87278 102.97173 103.45668 98.82242 97.97914
98.47227 93.68239 99.23691 97.52753 110.40996 103.08237 96.24730 91.50051
100.66518 101.01135 100.67333 101.40655 102.34820 92.72110 108.16423 100.86098
109.55517 94.24872 103.20541 104.20874 104.27162 97.69686 102.33794 103.73337
98.91116 101.97482 98.65305 107.19028 105.65982 101.16598 98.08743 102.54059
84.12012 102.28363 96.96526 94.28635 100.24081 114.63356 96.97925 98.68866
105.36180 104.02151 101.84210 99.33019 96.00552 92.30054 102.31320 99.31968
95.97090 112.21527 103.48074 104.02691 94.42016 94.89255 92.27357 103.12419
100.50584 105.08587 98.55178 102.14580 104.72846 91.53071 94.80154 101.20619
109.21925 107.22537 101.04751 103.26573 100.17858 100.63557 86.73204 112.24320

定制个性化报表

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
##########################################################################

# 定义报告期
report_period = c("2020-04-08") 

##########################################################################

# 计算报告期年、月、日
report_year = as.numeric(substr(report_period, 1, 4))
report_month = as.numeric(substr(report_period, 6, 7))
report_day = as.numeric(substr(report_period, 9, 10))

##########################################################################

# 数据导入到Excel文件中

## 新建一个工作簿wb、新建一个工作表daily
library(openxlsx)
library(lubridate)
wb = createWorkbook()
addWorksheet(wb, "daily", gridLines = FALSE)

## 设置全局列宽、行高、冻结活动单元格
setColWidths(
  wb, 
  "daily", 
  cols = 1:8,
  widths = c(26, rep(15.5, 5), 12, 18)
)

setRowHeights(
  wb, 
  "daily",
  rows = 1:(nrow(daily) + 4),
  heights = c(20, 35, rep(20, nrow(daily) + 2))
)

## 打印设置
pageSetup(
  wb, 
  "daily",
  orientation = "landscape",
  scale = 97,
  left = 0.7,
  right = 0.7, 
  top = 0.75,
  bottom = 0.75,
  header = 0.3, 
  footer = 0.3,
  fitToWidth = TRUE,
  fitToHeight = TRUE,
  paperSize = 9,
  printTitleRows = NULL, 
  printTitleCols = NULL
)

## 保密提示
secret = c("★内部资料、严格保密")
style_secret = createStyle(
  halign = "right",
  valign = "center",
  wrapText = TRUE,
  fontColour = "red",
  fontSize = 11, 
  fontName = "Arial"
)

mergeCells(
  wb, "daily", 
  rows = 1:1, 
  cols = 1:8
)

addStyle(
  wb, 
  "daily", 
  style = style_secret,
  rows = 1:1, 
  cols = 1:8
)

writeData(wb, "daily", secret,  
          startRow = 1)

## 大标题设置
title = paste0(
  year(report_period), "年" ,
  month(report_period), "月",
  day(report_period), 
  "日经营数据日报"
)

style_title = createStyle(
  halign = "center",
  valign = "center",
  wrapText = TRUE,
  textDecoration = c("bold"),
  fontColour = "black",
  fontSize = 20, 
  fontName = "Arial"
)

mergeCells(
  wb, "daily", 
  rows = 2:2, 
  cols = 1:8
)

addStyle(
  wb, 
  "daily", 
  style = style_title,
  rows = 2:2, 
  cols = 1:8
)

writeData(wb, "daily", title,  
          startRow = 2)

## 报告期设置
date = paste0("报告日期:", Sys.Date())

style_date = createStyle(
  halign = "right",
  valign = "center",
  wrapText = TRUE, 
  fontColour = "black",
  fontSize = 11, 
  fontName = "Arial")

mergeCells(wb, "daily", 
           rows = 3:3, cols = 6:7)

addStyle(wb, "daily", style = style_date,
         rows = 3:3, cols = 6:7)

writeData(wb, "daily", date,  
          startRow = 3, startCol = 6)

## 数据单位设置
unit = "单位:亿元、%"
style_unit = createStyle(
  halign = "center",
  valign = "center",
  wrapText = TRUE, 
  fontColour = "black",
  fontSize = 11, 
  fontName = "Arial")

mergeCells(wb, "daily", 
           rows = 3:3, cols = 8:8)

addStyle(
  wb, 
  "daily", 
  style = style_unit,
  rows = 3:3, 
  cols = 8:8
)

writeData(wb, "daily", unit,  
          startRow = 3, startCol = 8)

## 表头部分设置
style_header = createStyle(
  textDecoration = "Bold",
  halign = "center",
  valign = "center",
  wrapText = TRUE,
  border = "TopBottomLeftRight",
  borderColour = "black",
  fontColour = "white",
  fgFill = "#4F81BD",
  fontSize = 11, 
  fontName = "Arial")

## 数据部分设置
style_data = createStyle(
  valign = "center",
  border = "TopBottomLeftRight",
  borderColour = "black",
  fontSize = 11, 
  fontName = "Arial",
  numFmt = "0.00" )

addStyle(
  wb, 
  "daily", 
  style = style_data,
  rows = 5:(nrow(daily) + 4), 
  cols = 1:8,
  gridExpand = T
)

writeData(
  wb, 
  "daily", 
  daily,
  headerStyle = style_header,
  startRow = 4
)

## 保存工作簿
saveWorkbook(wb, overwrite = TRUE,  
             paste0(report_period, "daily.xlsx"))

#########################################################################

导出为PDF格式

其实导出到Excel基本已经大功告成了,可是有的单位要求发布的报告必须是PDF格式的。如果单纯的需要PDF格式文件,我们可以考虑直接从示例数据导出到PDF,而不需要先生成Excel文件再转为PDF格式,但是这里有个问题,就是数据部分倒是容易,但是报表里那一堆“乱七八糟”的要素,比如标题、报告日期、数据单位、制表、复核、保密等级等要素该如何放进去呢?位置又该怎么精准控制呢?通过已经定制好的Excel文件再转为PDF虽然有点绕远路了,但是能满足报表要求,所以这里仍然选择数据–> Excel文件–> PDF文件。

从Excel文件–> PDF文件,这也不难,打开Excel后利用虚拟打印机打印为PDF格式即可,手工操作工作量也是可以忽略的了。但是,对于一个完美主义极客而言这是不够的,因为将来有可能一次生成的不是一张报表,所以,为了将来不时之需,这里仍难考虑通过程序解决。R里面直接调用虚拟打印机将Excel工作表打印成PDF实现起来估计有点困难,利用JAVA实现的网上倒是有教程 JAVA调用打印机输出PDF文件。因为Excel从2007开始可以直接将Excel工作表转为PDF,这里我们考虑用R调用VBA实现。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# 调用VBA将Excel文件转为PDF格式
library(RDCOMClient)

xlFile = paste(
  "path/to/", "daily.xlsx", sep = report_period  # 这里使用绝对路径
  ) 

xlApp = COMCreate("Excel.Application")
xlApp[["Visible"]] = TRUE 
wb = xlApp[["Workbooks"]]$Open(Filename = xlFile)
sht = wb[["Worksheets"]]$Item(1)
sht$Select()

pdfFile = paste(
  "path/to/", "daily.pdf", sep = report_period  # 这里使用绝对路径
  ) 
if (file.exists(pdfFile) == TRUE)  file.remove(pdfFile)

xlApp[["ActiveSheet"]]$ExportAsFixedFormat(
  IgnorePrintAreas = FALSE,
  Type = 0,    # 输出为PDF
  Filename = pdfFile
  ) 

xlApp$Quit()  # 关闭Excel  

特别需要注意的是:

  • RDCOMClient包目前只支持Windows系统。

  • RDCOMClient包貌似不支持相对路径。

  • 如果已经生成了PDF文件,如果再运行一遍程序,原先生成的PDF文件不会被覆盖,执行ExportAsFixedFormat函数会报错,所以执行ExportAsFixedFormat函数前,必须删除之前已经生成的PDF文件再运行程序。