c5_labsci/zciyon/xlsx/file.go

package xlsx

import (
	"archive/zip"
	"bytes"
	"fmt"
	"io"
	"strconv"
	"strings"
)

// File is a high level structure providing a slice of Sheet structs
// to the user.
type File struct {
	worksheets           map[string]*zip.File
	worksheetRels        map[string]*zip.File
	referenceTable       *RefTable
	Date1904             bool
	styles               *xlsxStyleSheet
	Sheets               []*Sheet
	Sheet                map[string]*Sheet
	theme                *theme
	DefinedNames         []*xlsxDefinedName
	cellStoreConstructor CellStoreConstructor
	rowLimit             int
	colLimit             int
	valueOnly            bool
}

const NoRowLimit int = -1
const NoColLimit int = -1

type FileOption func(f *File)

// RowLimit will limit the rows handled in any given sheet to the
// first n, where n is the number of rows.
func RowLimit(n int) FileOption {
	return func(f *File) {
		f.rowLimit = n
	}
}

// ColLimit will limit the columns handled in any given sheet to the
// first n, where n is the number of columns
func ColLimit(n int) FileOption {
	return func(f *File) {
		f.colLimit = n
	}
}

// ValueOnly treats all NULL values as meaningless and it will delete all NULL value cells,
// before decode worksheet.xml. this option can save memory and time when parsing files
// with a large number of NULL values. But it may also cause accidental injury,
// because NULL may not really be meaningless. Use with caution
func ValueOnly() FileOption {
	return func(f *File) {
		f.valueOnly = true
	}
}

// NewFile creates a new File struct. You may pass it zero, one or
// many FileOption functions that affect the behaviour of the file.
func NewFile(options ...FileOption) *File {
	f := &File{
		Sheet:                make(map[string]*Sheet),
		Sheets:               make([]*Sheet, 0),
		DefinedNames:         make([]*xlsxDefinedName, 0),
		rowLimit:             NoRowLimit,
		colLimit:             NoColLimit,
		cellStoreConstructor: NewMemoryCellStore,
	}
	for _, opt := range options {
		opt(f)
	}
	return f
}

// OpenFile will take the name of an XLSX file and returns a populated
// xlsx.File struct for it.  You may pass it zero, one or
// many FileOption functions that affect the behaviour of the file.
func OpenFile(fileName string, options ...FileOption) (file *File, err error) {
	wrap := func(err error) (*File, error) {
		return nil, fmt.Errorf("OpenFile: %w", err)
	}

	var z *zip.ReadCloser
	z, err = zip.OpenReader(fileName)
	if err != nil {
		return wrap(err)
	}
	defer z.Close()

	file, err = ReadZip(z, options...)
	if err != nil {
		return wrap(err)
	}
	return file, nil
}

// OpenBinary() take bytes of an XLSX file and returns a populated
// xlsx.File struct for it.
func OpenBinary(bs []byte, options ...FileOption) (*File, error) {
	r := bytes.NewReader(bs)
	return OpenReaderAt(r, int64(r.Len()), options...)

}

// OpenReaderAt() take io.ReaderAt of an XLSX file and returns a populated
// xlsx.File struct for it.
func OpenReaderAt(r io.ReaderAt, size int64, options ...FileOption) (*File, error) {
	file, err := zip.NewReader(r, size)
	if err != nil {
		return nil, err
	}
	return ReadZipReader(file, options...)
}

// A convenient wrapper around File.ToSlice, FileToSlice will
// return the raw data contained in an Excel XLSX file as three
// dimensional slice.  The first index represents the sheet number,
// the second the row number, and the third the cell number.
//
// For example:
//
//	var mySlice [][][]string
//	var value string
//	mySlice = xlsx.FileToSlice("myXLSX.xlsx")
//	value = mySlice[0][0][0]
//
// Here, value would be set to the raw value of the cell A1 in the
// first sheet in the XLSX file.
func FileToSlice(path string, options ...FileOption) ([][][]string, error) {
	f, err := OpenFile(path, options...)
	if err != nil {
		return nil, err
	}
	return f.ToSlice()
}

// FileToSliceUnmerged is a wrapper around File.ToSliceUnmerged.
// It returns the raw data contained in an Excel XLSX file as three
// dimensional slice. Merged cells will be unmerged. Covered cells become the
// values of theirs origins.
func FileToSliceUnmerged(path string, options ...FileOption) ([][][]string, error) {
	f, err := OpenFile(path, options...)
	if err != nil {
		return nil, err
	}
	return f.ToSliceUnmerged()
}

func (f *File) makeWorkbook() xlsxWorkbook {
	return xlsxWorkbook{
		FileVersion: xlsxFileVersion{AppName: "Go XLSX"},
		WorkbookPr:  xlsxWorkbookPr{ShowObjects: "all"},
		BookViews: xlsxBookViews{
			WorkBookView: []xlsxWorkBookView{
				{
					ShowHorizontalScroll: true,
					ShowSheetTabs:        true,
					ShowVerticalScroll:   true,
					TabRatio:             204,
					WindowHeight:         8192,
					WindowWidth:          16384,
					XWindow:              "0",
					YWindow:              "0",
				},
			},
		},
		Sheets: xlsxSheets{Sheet: make([]xlsxSheet, len(f.Sheets))},
		CalcPr: xlsxCalcPr{
			IterateCount: 100,
			RefMode:      "A1",
			Iterate:      false,
			IterateDelta: 0.001,
		},
	}
}

// Some tools that read XLSX files have very strict requirements about
// the structure of the input XML.  In particular both Numbers on the Mac
// and SAS dislike inline XML namespace declarations, or namespace
// prefixes that don't match the ones that Excel itself uses.  This is a
// problem because the Go XML library doesn't multiple namespace
// declarations in a single element of a document.  This function is a
// horrible hack to fix that after the XML marshalling is completed.
func replaceRelationshipsNameSpace(workbookMarshal string) string {
	newWorkbook := strings.Replace(workbookMarshal, `xmlns:relationships="http://schemas.openxmlformats.org/officeDocument/2006/relationships" relationships:id`, `r:id`, -1)
	// Dirty hack to fix issues #63 and #91; encoding/xml currently
	// "doesn't allow for additional namespaces to be defined in the
	// root element of the document," as described by @tealeg in the
	// comments for #63.
	oldXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
	newXmlns := `<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
	return strings.Replace(newWorkbook, oldXmlns, newXmlns, 1)
}

func addRelationshipNameSpaceToWorksheet(worksheetMarshal string) string {
	oldXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`
	newXmlns := `<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">`
	newSheetMarshall := strings.Replace(worksheetMarshal, oldXmlns, newXmlns, 1)

	oldHyperlink := `<hyperlink id=`
	newHyperlink := `<hyperlink r:id=`
	newSheetMarshall = strings.Replace(newSheetMarshall, oldHyperlink, newHyperlink, -1)
	return newSheetMarshall
}

func cellIDStringWithFixed(cellIDString string) string {
	letterPart := strings.Map(letterOnlyMapF, cellIDString)
	intPart := strings.Map(intOnlyMapF, cellIDString)

	if letterPart != "" && intPart == "" {
		return fixedCellRefChar + letterPart
	} else if letterPart != "" && intPart != "" {
		return fixedCellRefChar + letterPart + fixedCellRefChar + intPart
	}

	return ""
}

// AutoFilter doesn't work in LibreOffice unless a special "FilterDatabase" tag
// is present in the "DefinedNames" array.  See:
//   - https://github.com/SheetJS/sheetjs/issues/1165
//   - https://bugs.documentfoundation.org/show_bug.cgi?id=118592
func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, error) {
	if sheet.AutoFilter == nil {
		return nil, nil
	}

	return &xlsxDefinedName{
		Data: fmt.Sprintf(
			"'%s'!%v:%v",
			strings.ReplaceAll(sheet.Name, "'", "''"),
			cellIDStringWithFixed(sheet.AutoFilter.TopLeftCell),
			cellIDStringWithFixed(sheet.AutoFilter.BottomRightCell),
		),
		Name:         "_xlnm._FilterDatabase",
		LocalSheetID: sheetIndex - 1,
		Hidden:       true,
	}, nil
}

func (f *File) ToSlice() (output [][][]string, err error) {
	output = [][][]string{}
	for _, sheet := range f.Sheets {
		s := [][]string{}
		err := sheet.ForEachRow(func(row *Row) error {
			r := []string{}
			err := row.ForEachCell(func(cell *Cell) error {
				str, err := cell.FormattedValue()
				if err != nil {
					// Recover from strconv.NumError if the value is an empty string,
					// and insert an empty string in the output.
					if numErr, ok := err.(*strconv.NumError); ok && numErr.Num == "" {
						str = ""
					} else {
						return err
					}
				}
				r = append(r, str)
				return nil
			})
			if err != nil {
				return err
			}

			s = append(s, r)
			return nil
		})
		if err != nil {
			return output, err
		}
		output = append(output, s)
	}
	return output, nil
}

// ToSliceUnmerged returns the raw data contained in the File as three
// dimensional slice (s. method ToSlice).
// A covered cell become the value of its origin cell.
// Example: table where A1:A2 at row 0 and row 1 are merged.
// | 2011        | Bread | 20 |
// |             | Fish  | 70 |
// | 2012 | 2013 | Egg   | 80 |
// This sheet will be converted to the slice:
// [
//
//	[2011 2011 Bread 20]
//	[2011 2011 Fish  70]
//	[2012 2013 Egg   80]
//
// ]
func (f *File) ToSliceUnmerged() (output [][][]string, err error) {
	output, err = f.ToSlice()
	if err != nil {
		return nil, err
	}

	for s, sheet := range f.Sheets {
		err := sheet.ForEachRow(func(row *Row) error {
			return row.ForEachCell(func(cell *Cell) error {
				if cell.HMerge > 0 || cell.VMerge > 0 {
					c, r := cell.GetCoordinates()
					v := output[s][r][c]
					for i := r; i <= r+cell.VMerge; i++ {
						for j := c; j <= c+cell.HMerge; j++ {
							if i != r || j != c {
								output[s][i][j] = v
							}
						}
					}
				}
				return nil
			})
		})
		if err != nil {
			return output, err
		}
	}

	return output, nil
}

type DefinedName xlsxDefinedName