326 lines
9.6 KiB
Go
326 lines
9.6 KiB
Go
package xlsx
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// File is a high level structure providing a slice of Sheet structs
|
|
// to the user.
|
|
type File struct {
|
|
worksheets map[string]*zip.File
|
|
worksheetRels map[string]*zip.File
|
|
referenceTable *RefTable
|
|
Date1904 bool
|
|
styles *xlsxStyleSheet
|
|
Sheets []*Sheet
|
|
Sheet map[string]*Sheet
|
|
theme *theme
|
|
DefinedNames []*xlsxDefinedName
|
|
cellStoreConstructor CellStoreConstructor
|
|
rowLimit int
|
|
colLimit int
|
|
valueOnly bool
|
|
}
|
|
|
|
const NoRowLimit int = -1
|
|
const NoColLimit int = -1
|
|
|
|
type FileOption func(f *File)
|
|
|
|
// RowLimit will limit the rows handled in any given sheet to the
|
|
// first n, where n is the number of rows.
|
|
func RowLimit(n int) FileOption {
|
|
return func(f *File) {
|
|
f.rowLimit = n
|
|
}
|
|
}
|
|
|
|
// ColLimit will limit the columns handled in any given sheet to the
|
|
// first n, where n is the number of columns
|
|
func ColLimit(n int) FileOption {
|
|
return func(f *File) {
|
|
f.colLimit = n
|
|
}
|
|
}
|
|
|
|
// ValueOnly treats all NULL values as meaningless and it will delete all NULL value cells,
|
|
// before decode worksheet.xml. this option can save memory and time when parsing files
|
|
// with a large number of NULL values. But it may also cause accidental injury,
|
|
// because NULL may not really be meaningless. Use with caution
|
|
func ValueOnly() FileOption {
|
|
return func(f *File) {
|
|
f.valueOnly = true
|
|
}
|
|
}
|
|
|
|
// NewFile creates a new File struct. You may pass it zero, one or
|
|
// many FileOption functions that affect the behaviour of the file.
|
|
func NewFile(options ...FileOption) *File {
|
|
f := &File{
|
|
Sheet: make(map[string]*Sheet),
|
|
Sheets: make([]*Sheet, 0),
|
|
DefinedNames: make([]*xlsxDefinedName, 0),
|
|
rowLimit: NoRowLimit,
|
|
colLimit: NoColLimit,
|
|
cellStoreConstructor: NewMemoryCellStore,
|
|
}
|
|
for _, opt := range options {
|
|
opt(f)
|
|
}
|
|
return f
|
|
}
|
|
|
|
// OpenFile will take the name of an XLSX file and returns a populated
|
|
// xlsx.File struct for it. You may pass it zero, one or
|
|
// many FileOption functions that affect the behaviour of the file.
|
|
func OpenFile(fileName string, options ...FileOption) (file *File, err error) {
|
|
wrap := func(err error) (*File, error) {
|
|
return nil, fmt.Errorf("OpenFile: %w", err)
|
|
}
|
|
|
|
var z *zip.ReadCloser
|
|
z, err = zip.OpenReader(fileName)
|
|
if err != nil {
|
|
return wrap(err)
|
|
}
|
|
defer z.Close()
|
|
|
|
file, err = ReadZip(z, options...)
|
|
if err != nil {
|
|
return wrap(err)
|
|
}
|
|
return file, nil
|
|
}
|
|
|
|
// OpenBinary() take bytes of an XLSX file and returns a populated
|
|
// xlsx.File struct for it.
|
|
func OpenBinary(bs []byte, options ...FileOption) (*File, error) {
|
|
r := bytes.NewReader(bs)
|
|
return OpenReaderAt(r, int64(r.Len()), options...)
|
|
|
|
}
|
|
|
|
// OpenReaderAt() take io.ReaderAt of an XLSX file and returns a populated
|
|
// xlsx.File struct for it.
|
|
func OpenReaderAt(r io.ReaderAt, size int64, options ...FileOption) (*File, error) {
|
|
file, err := zip.NewReader(r, size)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ReadZipReader(file, options...)
|
|
}
|
|
|
|
// A convenient wrapper around File.ToSlice, FileToSlice will
|
|
// return the raw data contained in an Excel XLSX file as three
|
|
// dimensional slice. The first index represents the sheet number,
|
|
// the second the row number, and the third the cell number.
|
|
//
|
|
// For example:
|
|
//
|
|
// var mySlice [][][]string
|
|
// var value string
|
|
// mySlice = xlsx.FileToSlice("myXLSX.xlsx")
|
|
// value = mySlice[0][0][0]
|
|
//
|
|
// Here, value would be set to the raw value of the cell A1 in the
|
|
// first sheet in the XLSX file.
|
|
func FileToSlice(path string, options ...FileOption) ([][][]string, error) {
|
|
f, err := OpenFile(path, options...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return f.ToSlice()
|
|
}
|
|
|
|
// FileToSliceUnmerged is a wrapper around File.ToSliceUnmerged.
|
|
// It returns the raw data contained in an Excel XLSX file as three
|
|
// dimensional slice. Merged cells will be unmerged. Covered cells become the
|
|
// values of theirs origins.
|
|
func FileToSliceUnmerged(path string, options ...FileOption) ([][][]string, error) {
|
|
f, err := OpenFile(path, options...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return f.ToSliceUnmerged()
|
|
}
|
|
|
|
func (f *File) makeWorkbook() xlsxWorkbook {
|
|
return xlsxWorkbook{
|
|
FileVersion: xlsxFileVersion{AppName: "Go XLSX"},
|
|
WorkbookPr: xlsxWorkbookPr{ShowObjects: "all"},
|
|
BookViews: xlsxBookViews{
|
|
WorkBookView: []xlsxWorkBookView{
|
|
{
|
|
ShowHorizontalScroll: true,
|
|
ShowSheetTabs: true,
|
|
ShowVerticalScroll: true,
|
|
TabRatio: 204,
|
|
WindowHeight: 8192,
|
|
WindowWidth: 16384,
|
|
XWindow: "0",
|
|
YWindow: "0",
|
|
},
|
|
},
|
|
},
|
|
Sheets: xlsxSheets{Sheet: make([]xlsxSheet, len(f.Sheets))},
|
|
CalcPr: xlsxCalcPr{
|
|
IterateCount: 100,
|
|
RefMode: "A1",
|
|
Iterate: false,
|
|
IterateDelta: 0.001,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Some tools that read XLSX files have very strict requirements about
|
|
// the structure of the input XML. In particular both Numbers on the Mac
|
|
// and SAS dislike inline XML namespace declarations, or namespace
|
|
// prefixes that don't match the ones that Excel itself uses. This is a
|
|
// problem because the Go XML library doesn't multiple namespace
|
|
// declarations in a single element of a document. This function is a
|
|
// horrible hack to fix that after the XML marshalling is completed.
|
|
func replaceRelationshipsNameSpace(workbookMarshal string) string {
|
|
newWorkbook := strings.Replace(workbookMarshal, `xmlns:relationships="http://schemas.openxmlformats.org/officeDocument/2006/relationships" relationships:id`, `r:id`, -1)
|
|
// Dirty hack to fix issues #63 and #91; encoding/xml currently
|
|
// "doesn't allow for additional namespaces to be defined in the
|
|
// root element of the document," as described by @tealeg in the
|
|
// comments for #63.
|
|
oldXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
|
|
newXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
|
|
return strings.Replace(newWorkbook, oldXmlns, newXmlns, 1)
|
|
}
|
|
|
|
func addRelationshipNameSpaceToWorksheet(worksheetMarshal string) string {
|
|
oldXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
|
|
newXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
|
|
newSheetMarshall := strings.Replace(worksheetMarshal, oldXmlns, newXmlns, 1)
|
|
|
|
oldHyperlink := `<hyperlink id=`
|
|
newHyperlink := `<hyperlink r:id=`
|
|
newSheetMarshall = strings.Replace(newSheetMarshall, oldHyperlink, newHyperlink, -1)
|
|
return newSheetMarshall
|
|
}
|
|
|
|
func cellIDStringWithFixed(cellIDString string) string {
|
|
letterPart := strings.Map(letterOnlyMapF, cellIDString)
|
|
intPart := strings.Map(intOnlyMapF, cellIDString)
|
|
|
|
if letterPart != "" && intPart == "" {
|
|
return fixedCellRefChar + letterPart
|
|
} else if letterPart != "" && intPart != "" {
|
|
return fixedCellRefChar + letterPart + fixedCellRefChar + intPart
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// AutoFilter doesn't work in LibreOffice unless a special "FilterDatabase" tag
|
|
// is present in the "DefinedNames" array. See:
|
|
// - https://github.com/SheetJS/sheetjs/issues/1165
|
|
// - https://bugs.documentfoundation.org/show_bug.cgi?id=118592
|
|
func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, error) {
|
|
if sheet.AutoFilter == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
return &xlsxDefinedName{
|
|
Data: fmt.Sprintf(
|
|
"'%s'!%v:%v",
|
|
strings.ReplaceAll(sheet.Name, "'", "''"),
|
|
cellIDStringWithFixed(sheet.AutoFilter.TopLeftCell),
|
|
cellIDStringWithFixed(sheet.AutoFilter.BottomRightCell),
|
|
),
|
|
Name: "_xlnm._FilterDatabase",
|
|
LocalSheetID: sheetIndex - 1,
|
|
Hidden: true,
|
|
}, nil
|
|
}
|
|
|
|
func (f *File) ToSlice() (output [][][]string, err error) {
|
|
output = [][][]string{}
|
|
for _, sheet := range f.Sheets {
|
|
s := [][]string{}
|
|
err := sheet.ForEachRow(func(row *Row) error {
|
|
r := []string{}
|
|
err := row.ForEachCell(func(cell *Cell) error {
|
|
str, err := cell.FormattedValue()
|
|
if err != nil {
|
|
// Recover from strconv.NumError if the value is an empty string,
|
|
// and insert an empty string in the output.
|
|
if numErr, ok := err.(*strconv.NumError); ok && numErr.Num == "" {
|
|
str = ""
|
|
} else {
|
|
return err
|
|
}
|
|
}
|
|
r = append(r, str)
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s = append(s, r)
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return output, err
|
|
}
|
|
output = append(output, s)
|
|
}
|
|
return output, nil
|
|
}
|
|
|
|
// ToSliceUnmerged returns the raw data contained in the File as three
|
|
// dimensional slice (s. method ToSlice).
|
|
// A covered cell become the value of its origin cell.
|
|
// Example: table where A1:A2 at row 0 and row 1 are merged.
|
|
// | 2011 | Bread | 20 |
|
|
// | | Fish | 70 |
|
|
// | 2012 | 2013 | Egg | 80 |
|
|
// This sheet will be converted to the slice:
|
|
// [
|
|
//
|
|
// [2011 2011 Bread 20]
|
|
// [2011 2011 Fish 70]
|
|
// [2012 2013 Egg 80]
|
|
//
|
|
// ]
|
|
func (f *File) ToSliceUnmerged() (output [][][]string, err error) {
|
|
output, err = f.ToSlice()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for s, sheet := range f.Sheets {
|
|
err := sheet.ForEachRow(func(row *Row) error {
|
|
return row.ForEachCell(func(cell *Cell) error {
|
|
if cell.HMerge > 0 || cell.VMerge > 0 {
|
|
c, r := cell.GetCoordinates()
|
|
v := output[s][r][c]
|
|
for i := r; i <= r+cell.VMerge; i++ {
|
|
for j := c; j <= c+cell.HMerge; j++ {
|
|
if i != r || j != c {
|
|
output[s][i][j] = v
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
})
|
|
if err != nil {
|
|
return output, err
|
|
}
|
|
}
|
|
|
|
return output, nil
|
|
}
|
|
|
|
type DefinedName xlsxDefinedName
|