forked from tealeg/xlsx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stream_file_builder.go
245 lines (227 loc) · 9.2 KB
/
stream_file_builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
// Authors: Ryan Hollis (ryanh@)
// The purpose of StreamFileBuilder and StreamFile is to allow streamed writing of XLSX files.
// Directions:
// 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
// 2. Add the sheets and their first row of data by calling AddSheet().
// 3. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
// 4. Write to the StreamFile with Write(). Writes begin on the first sheet. New rows are always written and flushed
// to the io. All rows written to the same sheet must have the same number of cells as the header provided when the sheet
// was created or an error will be returned.
// 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
// 6. Call Close() to finish.
// Future work suggestions:
// Currently the only supported cell type is string, since the main reason this library was written was to prevent
// strings from being interpreted as numbers. It would be nice to have support for numbers and money so that the exported
// files could better take advantage of XLSX's features.
// All text is written with the same text style. Support for additional text styles could be added to highlight certain
// data in the file.
// The current default style uses fonts that are not on Macs by default so opening the XLSX files in Numbers causes a
// pop up that says there are missing fonts. The font could be changed to something that is usually found on Mac and PC.
package xlsx
import (
"archive/zip"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
)
type StreamFileBuilder struct {
built bool
xlsxFile *File
zipWriter *zip.Writer
cellTypeToStyleIds map[CellType]int
maxStyleId int
styleIds [][]int
}
const (
sheetFilePathPrefix = "xl/worksheets/sheet"
sheetFilePathSuffix = ".xml"
endSheetDataTag = "</sheetData>"
dimensionTag = `<dimension ref="%s"></dimension>`
// This is the index of the max style that this library will insert into XLSX sheets by default.
// This allows us to predict what the style id of styles that we add will be.
// TestXlsxStyleBehavior tests that this behavior continues to be what we expect.
initMaxStyleId = 1
)
var BuiltStreamFileBuilderError = errors.New("StreamFileBuilder has already been built, functions may no longer be used")
// NewStreamFileBuilder creates an StreamFileBuilder that will write to the the provided io.writer
func NewStreamFileBuilder(writer io.Writer) *StreamFileBuilder {
return &StreamFileBuilder{
zipWriter: zip.NewWriter(writer),
xlsxFile: NewFile(),
cellTypeToStyleIds: make(map[CellType]int),
maxStyleId: initMaxStyleId,
}
}
// NewStreamFileBuilderForPath takes the name of an XLSX file and returns a builder for it.
// The file will be created if it does not exist, or truncated if it does.
func NewStreamFileBuilderForPath(path string) (*StreamFileBuilder, error) {
file, err := os.Create(path)
if err != nil {
return nil, err
}
return NewStreamFileBuilder(file), nil
}
// AddSheet will add sheets with the given name with the provided headers. The headers cannot be edited later, and all
// rows written to the sheet must contain the same number of cells as the header. Sheet names must be unique, or an
// error will be thrown.
func (sb *StreamFileBuilder) AddSheet(name string, headers []string, cellTypes []*CellType) error {
if sb.built {
return BuiltStreamFileBuilderError
}
if len(cellTypes) > len(headers) {
return errors.New("cellTypes is longer than headers")
}
sheet, err := sb.xlsxFile.AddSheet(name)
if err != nil {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return err
}
sb.styleIds = append(sb.styleIds, []int{})
row := sheet.AddRow()
if count := row.WriteSlice(&headers, -1); count != len(headers) {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return errors.New("failed to write headers")
}
for i, cellType := range cellTypes {
var cellStyleIndex int
var ok bool
if cellType != nil {
// The cell type is one of the attributes of a Style.
// Since it is the only attribute of Style that we use, we can assume that cell types
// map one to one with Styles and their Style ID.
// If a new cell type is used, a new style gets created with an increased id, if an existing cell type is
// used, the pre-existing style will also be used.
cellStyleIndex, ok = sb.cellTypeToStyleIds[*cellType]
if !ok {
sb.maxStyleId++
cellStyleIndex = sb.maxStyleId
sb.cellTypeToStyleIds[*cellType] = sb.maxStyleId
}
sheet.Cols[i].SetType(*cellType)
}
sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
}
return nil
}
// Build begins streaming the XLSX file to the io, by writing all the XLSX metadata. It creates a StreamFile struct
// that can be used to write the rows to the sheets.
func (sb *StreamFileBuilder) Build() (*StreamFile, error) {
if sb.built {
return nil, BuiltStreamFileBuilderError
}
sb.built = true
parts, err := sb.xlsxFile.MarshallParts()
if err != nil {
return nil, err
}
es := &StreamFile{
zipWriter: sb.zipWriter,
xlsxFile: sb.xlsxFile,
sheetXmlPrefix: make([]string, len(sb.xlsxFile.Sheets)),
sheetXmlSuffix: make([]string, len(sb.xlsxFile.Sheets)),
styleIds: sb.styleIds,
}
for path, data := range parts {
// If the part is a sheet, don't write it yet. We only want to write the XLSX metadata files, since at this
// point the sheets are still empty. The sheet files will be written later as their rows come in.
if strings.HasPrefix(path, sheetFilePathPrefix) {
if err := sb.processEmptySheetXML(es, path, data); err != nil {
return nil, err
}
continue
}
metadataFile, err := sb.zipWriter.Create(path)
if err != nil {
return nil, err
}
_, err = metadataFile.Write([]byte(data))
if err != nil {
return nil, err
}
}
if err := es.NextSheet(); err != nil {
return nil, err
}
return es, nil
}
// processEmptySheetXML will take in the path and XML data of an empty sheet, and will save the beginning and end of the
// XML file so that these can be written at the right time.
func (sb *StreamFileBuilder) processEmptySheetXML(sf *StreamFile, path, data string) error {
// Get the sheet index from the path
sheetIndex, err := getSheetIndex(sf, path)
if err != nil {
return err
}
// Remove the Dimension tag. Since more rows are going to be written to the sheet, it will be wrong.
// It is valid to for a sheet to be missing a Dimension tag, but it is not valid for it to be wrong.
data, err = removeDimensionTag(data, sf.xlsxFile.Sheets[sheetIndex])
if err != nil {
return err
}
// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
prefix, suffix, err := splitSheetIntoPrefixAndSuffix(data)
if err != nil {
return err
}
sf.sheetXmlPrefix[sheetIndex] = prefix
sf.sheetXmlSuffix[sheetIndex] = suffix
return nil
}
// getSheetIndex parses the path to the XLSX sheet data and returns the index
// The files that store the data for each sheet must have the format:
// xl/worksheets/sheet123.xml
// where 123 is the index of the sheet. This file path format is part of the XLSX file standard.
func getSheetIndex(sf *StreamFile, path string) (int, error) {
indexString := path[len(sheetFilePathPrefix) : len(path)-len(sheetFilePathSuffix)]
sheetXLSXIndex, err := strconv.Atoi(indexString)
if err != nil {
return -1, errors.New("Unexpected sheet file name from xlsx package")
}
if sheetXLSXIndex < 1 || len(sf.sheetXmlPrefix) < sheetXLSXIndex ||
len(sf.sheetXmlSuffix) < sheetXLSXIndex || len(sf.xlsxFile.Sheets) < sheetXLSXIndex {
return -1, errors.New("Unexpected sheet index")
}
sheetArrayIndex := sheetXLSXIndex - 1
return sheetArrayIndex, nil
}
// removeDimensionTag will return the passed in XLSX Spreadsheet XML with the dimension tag removed.
// data is the XML data for the sheet
// sheet is the Sheet struct that the XML was created from.
// Can return an error if the XML's dimension tag does not match was is expected based on the provided Sheet
func removeDimensionTag(data string, sheet *Sheet) (string, error) {
x := len(sheet.Cols) - 1
y := len(sheet.Rows) - 1
if x < 0 {
x = 0
}
if y < 0 {
y = 0
}
var dimensionRef string
if x == 0 && y == 0 {
dimensionRef = "A1"
} else {
endCoordinate := GetCellIDStringFromCoords(x, y)
dimensionRef = "A1:" + endCoordinate
}
dataParts := strings.Split(data, fmt.Sprintf(dimensionTag, dimensionRef))
if len(dataParts) != 2 {
return "", errors.New("unexpected Sheet XML: dimension tag not found")
}
return dataParts[0] + dataParts[1], nil
}
// splitSheetIntoPrefixAndSuffix will split the provided XML sheet into a prefix and a suffix so that
// more spreadsheet rows can be inserted in between.
func splitSheetIntoPrefixAndSuffix(data string) (string, string, error) {
// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
sheetParts := strings.Split(data, endSheetDataTag)
if len(sheetParts) != 2 {
return "", "", errors.New("unexpected Sheet XML: SheetData close tag not found")
}
return sheetParts[0], sheetParts[1], nil
}