c5_labsci/zciyon/xlsx/file.go
2026-01-27 00:52:00 +08:00

326 lines
9.6 KiB
Go

package xlsx
import (
"archive/zip"
"bytes"
"fmt"
"io"
"strconv"
"strings"
)
// File is a high level structure providing a slice of Sheet structs
// to the user.
type File struct {
worksheets map[string]*zip.File
worksheetRels map[string]*zip.File
referenceTable *RefTable
Date1904 bool
styles *xlsxStyleSheet
Sheets []*Sheet
Sheet map[string]*Sheet
theme *theme
DefinedNames []*xlsxDefinedName
cellStoreConstructor CellStoreConstructor
rowLimit int
colLimit int
valueOnly bool
}
const NoRowLimit int = -1
const NoColLimit int = -1
type FileOption func(f *File)
// RowLimit will limit the rows handled in any given sheet to the
// first n, where n is the number of rows.
func RowLimit(n int) FileOption {
return func(f *File) {
f.rowLimit = n
}
}
// ColLimit will limit the columns handled in any given sheet to the
// first n, where n is the number of columns
func ColLimit(n int) FileOption {
return func(f *File) {
f.colLimit = n
}
}
// ValueOnly treats all NULL values as meaningless and it will delete all NULL value cells,
// before decode worksheet.xml. this option can save memory and time when parsing files
// with a large number of NULL values. But it may also cause accidental injury,
// because NULL may not really be meaningless. Use with caution
func ValueOnly() FileOption {
return func(f *File) {
f.valueOnly = true
}
}
// NewFile creates a new File struct. You may pass it zero, one or
// many FileOption functions that affect the behaviour of the file.
func NewFile(options ...FileOption) *File {
f := &File{
Sheet: make(map[string]*Sheet),
Sheets: make([]*Sheet, 0),
DefinedNames: make([]*xlsxDefinedName, 0),
rowLimit: NoRowLimit,
colLimit: NoColLimit,
cellStoreConstructor: NewMemoryCellStore,
}
for _, opt := range options {
opt(f)
}
return f
}
// OpenFile will take the name of an XLSX file and returns a populated
// xlsx.File struct for it. You may pass it zero, one or
// many FileOption functions that affect the behaviour of the file.
func OpenFile(fileName string, options ...FileOption) (file *File, err error) {
wrap := func(err error) (*File, error) {
return nil, fmt.Errorf("OpenFile: %w", err)
}
var z *zip.ReadCloser
z, err = zip.OpenReader(fileName)
if err != nil {
return wrap(err)
}
defer z.Close()
file, err = ReadZip(z, options...)
if err != nil {
return wrap(err)
}
return file, nil
}
// OpenBinary() take bytes of an XLSX file and returns a populated
// xlsx.File struct for it.
func OpenBinary(bs []byte, options ...FileOption) (*File, error) {
r := bytes.NewReader(bs)
return OpenReaderAt(r, int64(r.Len()), options...)
}
// OpenReaderAt() take io.ReaderAt of an XLSX file and returns a populated
// xlsx.File struct for it.
func OpenReaderAt(r io.ReaderAt, size int64, options ...FileOption) (*File, error) {
file, err := zip.NewReader(r, size)
if err != nil {
return nil, err
}
return ReadZipReader(file, options...)
}
// A convenient wrapper around File.ToSlice, FileToSlice will
// return the raw data contained in an Excel XLSX file as three
// dimensional slice. The first index represents the sheet number,
// the second the row number, and the third the cell number.
//
// For example:
//
// var mySlice [][][]string
// var value string
// mySlice = xlsx.FileToSlice("myXLSX.xlsx")
// value = mySlice[0][0][0]
//
// Here, value would be set to the raw value of the cell A1 in the
// first sheet in the XLSX file.
func FileToSlice(path string, options ...FileOption) ([][][]string, error) {
f, err := OpenFile(path, options...)
if err != nil {
return nil, err
}
return f.ToSlice()
}
// FileToSliceUnmerged is a wrapper around File.ToSliceUnmerged.
// It returns the raw data contained in an Excel XLSX file as three
// dimensional slice. Merged cells will be unmerged. Covered cells become the
// values of theirs origins.
func FileToSliceUnmerged(path string, options ...FileOption) ([][][]string, error) {
f, err := OpenFile(path, options...)
if err != nil {
return nil, err
}
return f.ToSliceUnmerged()
}
func (f *File) makeWorkbook() xlsxWorkbook {
return xlsxWorkbook{
FileVersion: xlsxFileVersion{AppName: "Go XLSX"},
WorkbookPr: xlsxWorkbookPr{ShowObjects: "all"},
BookViews: xlsxBookViews{
WorkBookView: []xlsxWorkBookView{
{
ShowHorizontalScroll: true,
ShowSheetTabs: true,
ShowVerticalScroll: true,
TabRatio: 204,
WindowHeight: 8192,
WindowWidth: 16384,
XWindow: "0",
YWindow: "0",
},
},
},
Sheets: xlsxSheets{Sheet: make([]xlsxSheet, len(f.Sheets))},
CalcPr: xlsxCalcPr{
IterateCount: 100,
RefMode: "A1",
Iterate: false,
IterateDelta: 0.001,
},
}
}
// Some tools that read XLSX files have very strict requirements about
// the structure of the input XML. In particular both Numbers on the Mac
// and SAS dislike inline XML namespace declarations, or namespace
// prefixes that don't match the ones that Excel itself uses. This is a
// problem because the Go XML library doesn't multiple namespace
// declarations in a single element of a document. This function is a
// horrible hack to fix that after the XML marshalling is completed.
func replaceRelationshipsNameSpace(workbookMarshal string) string {
newWorkbook := strings.Replace(workbookMarshal, `xmlns:relationships="http://schemas.openxmlformats.org/officeDocument/2006/relationships" relationships:id`, `r:id`, -1)
// Dirty hack to fix issues #63 and #91; encoding/xml currently
// "doesn't allow for additional namespaces to be defined in the
// root element of the document," as described by @tealeg in the
// comments for #63.
oldXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
newXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
return strings.Replace(newWorkbook, oldXmlns, newXmlns, 1)
}
func addRelationshipNameSpaceToWorksheet(worksheetMarshal string) string {
oldXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
newXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
newSheetMarshall := strings.Replace(worksheetMarshal, oldXmlns, newXmlns, 1)
oldHyperlink := `<hyperlink id=`
newHyperlink := `<hyperlink r:id=`
newSheetMarshall = strings.Replace(newSheetMarshall, oldHyperlink, newHyperlink, -1)
return newSheetMarshall
}
func cellIDStringWithFixed(cellIDString string) string {
letterPart := strings.Map(letterOnlyMapF, cellIDString)
intPart := strings.Map(intOnlyMapF, cellIDString)
if letterPart != "" && intPart == "" {
return fixedCellRefChar + letterPart
} else if letterPart != "" && intPart != "" {
return fixedCellRefChar + letterPart + fixedCellRefChar + intPart
}
return ""
}
// AutoFilter doesn't work in LibreOffice unless a special "FilterDatabase" tag
// is present in the "DefinedNames" array. See:
// - https://github.com/SheetJS/sheetjs/issues/1165
// - https://bugs.documentfoundation.org/show_bug.cgi?id=118592
func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, error) {
if sheet.AutoFilter == nil {
return nil, nil
}
return &xlsxDefinedName{
Data: fmt.Sprintf(
"'%s'!%v:%v",
strings.ReplaceAll(sheet.Name, "'", "''"),
cellIDStringWithFixed(sheet.AutoFilter.TopLeftCell),
cellIDStringWithFixed(sheet.AutoFilter.BottomRightCell),
),
Name: "_xlnm._FilterDatabase",
LocalSheetID: sheetIndex - 1,
Hidden: true,
}, nil
}
func (f *File) ToSlice() (output [][][]string, err error) {
output = [][][]string{}
for _, sheet := range f.Sheets {
s := [][]string{}
err := sheet.ForEachRow(func(row *Row) error {
r := []string{}
err := row.ForEachCell(func(cell *Cell) error {
str, err := cell.FormattedValue()
if err != nil {
// Recover from strconv.NumError if the value is an empty string,
// and insert an empty string in the output.
if numErr, ok := err.(*strconv.NumError); ok && numErr.Num == "" {
str = ""
} else {
return err
}
}
r = append(r, str)
return nil
})
if err != nil {
return err
}
s = append(s, r)
return nil
})
if err != nil {
return output, err
}
output = append(output, s)
}
return output, nil
}
// ToSliceUnmerged returns the raw data contained in the File as three
// dimensional slice (s. method ToSlice).
// A covered cell become the value of its origin cell.
// Example: table where A1:A2 at row 0 and row 1 are merged.
// | 2011 | Bread | 20 |
// | | Fish | 70 |
// | 2012 | 2013 | Egg | 80 |
// This sheet will be converted to the slice:
// [
//
// [2011 2011 Bread 20]
// [2011 2011 Fish 70]
// [2012 2013 Egg 80]
//
// ]
func (f *File) ToSliceUnmerged() (output [][][]string, err error) {
output, err = f.ToSlice()
if err != nil {
return nil, err
}
for s, sheet := range f.Sheets {
err := sheet.ForEachRow(func(row *Row) error {
return row.ForEachCell(func(cell *Cell) error {
if cell.HMerge > 0 || cell.VMerge > 0 {
c, r := cell.GetCoordinates()
v := output[s][r][c]
for i := r; i <= r+cell.VMerge; i++ {
for j := c; j <= c+cell.HMerge; j++ {
if i != r || j != c {
output[s][i][j] = v
}
}
}
}
return nil
})
})
if err != nil {
return output, err
}
}
return output, nil
}
type DefinedName xlsxDefinedName