package xlsx import ( "archive/zip" "bytes" "encoding/xml" "errors" "fmt" "io" "io/ioutil" "path" "path/filepath" "regexp" "runtime/debug" "strconv" "strings" ) const ( fixedCellRefChar = "$" cellRangeChar = ":" externalSheetBangChar = "!" ) // XLSXReaderError is the standard error type for otherwise undefined // errors in the XSLX reading process. type XLSXReaderError struct { Err string } // Error returns a string value from an XLSXReaderError struct in order // that it might comply with the builtin.error interface. func (e *XLSXReaderError) Error() string { return e.Err } // getRangeFromString is an internal helper function that converts // XLSX internal range syntax to a pair of integers. For example, // the range string "1:3" yield the upper and lower integers 1 and 3. func getRangeFromString(rangeString string) (lower int, upper int, error error) { var parts []string parts = strings.SplitN(rangeString, cellRangeChar, 2) if parts[0] == "" { error = fmt.Errorf("Invalid range '%s'\n", rangeString) } if parts[1] == "" { error = fmt.Errorf("Invalid range '%s'\n", rangeString) } lower, error = strconv.Atoi(parts[0]) if error != nil { error = fmt.Errorf("Invalid range (not integer in lower bound) %s\n", rangeString) } upper, error = strconv.Atoi(parts[1]) if error != nil { error = fmt.Errorf("Invalid range (not integer in upper bound) %s\n", rangeString) } return lower, upper, error } // ColLettersToIndex is used to convert a character based column // reference to a zero based numeric column identifier. func ColLettersToIndex(letters string) int { sum, mul, n := 0, 1, 0 for i := len(letters) - 1; i >= 0; i, mul, n = i-1, mul*26, 1 { c := letters[i] switch { case 'A' <= c && c <= 'Z': n += int(c - 'A') case 'a' <= c && c <= 'z': n += int(c - 'a') } sum += n * mul } return sum } // ColIndexToLetters is used to convert a zero based, numeric column // indentifier into a character code. func ColIndexToLetters(n int) string { // taken from https://github.com/psmithuk/xlsx/blob/master/xlsx.go var s string n += 1 for n > 0 { n -= 1 l := n % 26 s = string('A'+rune(l)) + s n /= 26 } return s } // RowIndexToString is used to convert a zero based, numeric row // indentifier into its string representation. func RowIndexToString(rowRef int) string { return strconv.Itoa(rowRef + 1) } // letterOnlyMapF is used in conjunction with strings.Map to return // only the characters A-Z and a-z in a string func letterOnlyMapF(rune rune) rune { switch { case 'A' <= rune && rune <= 'Z': return rune case 'a' <= rune && rune <= 'z': return rune - 32 } return -1 } // intOnlyMapF is used in conjunction with strings.Map to return only // the numeric portions of a string. func intOnlyMapF(rune rune) rune { if rune >= 48 && rune < 58 { return rune } return -1 } // GetCoordsFromCellIDString returns the zero based cartesian // coordinates from a cell name in Excel format, e.g. the cellIDString // "A1" returns 0, 0 and the "B3" return 1, 2. func GetCoordsFromCellIDString(cellIDString string) (x, y int, err error) { wrap := func(err error) (int, int, error) { return -1, -1, fmt.Errorf("GetCoordsFromCellIdString(%q): %w", cellIDString, err) } var letterPart string = strings.Map(letterOnlyMapF, cellIDString) y, err = strconv.Atoi(strings.Map(intOnlyMapF, cellIDString)) if err != nil { return wrap(err) } y -= 1 // Zero based x = ColLettersToIndex(letterPart) return x, y, nil } // GetCellIDStringFromCoords returns the Excel format cell name that // represents a pair of zero based cartesian coordinates. func GetCellIDStringFromCoords(x, y int) string { return GetCellIDStringFromCoordsWithFixed(x, y, false, false) } // GetCellIDStringFromCoordsWithFixed returns the Excel format cell name that // represents a pair of zero based cartesian coordinates. // It can specify either value as fixed. func GetCellIDStringFromCoordsWithFixed(x, y int, xFixed, yFixed bool) string { xStr := ColIndexToLetters(x) if xFixed { xStr = fixedCellRefChar + xStr } yStr := RowIndexToString(y) if yFixed { yStr = fixedCellRefChar + yStr } return xStr + yStr } // getMaxMinFromDimensionRef return the zero based cartesian maximum // and minimum coordinates from the dimension reference embedded in a // XLSX worksheet. For example, the dimension reference "A1:B2" // returns "0,0", "1,1". func getMaxMinFromDimensionRef(ref string) (minx, miny, maxx, maxy int, err error) { var parts []string wrap := func(err error) (int, int, int, int, error) { return -1, -1, -1, -1, fmt.Errorf("getMaxMinFromDimensionRef: %w", err) } parts = strings.Split(ref, cellRangeChar) minx, miny, err = GetCoordsFromCellIDString(parts[0]) if err != nil { return wrap(err) } maxx, maxy, err = GetCoordsFromCellIDString(parts[1]) if err != nil { return wrap(err) } return } // calculateMaxMinFromWorkSheet works out the dimensions of a spreadsheet // that doesn't have a DimensionRef set. The only case currently // known where this is true is with XLSX exported from Google Docs. func calculateMaxMinFromWorksheet(worksheet *xlsxWorksheet, colLimit int) (minx, miny, maxx, maxy int, err error) { // Note, this method could be very slow for large spreadsheets. var x, y int var maxVal int wrap := func(err error) (int, int, int, int, error) { return -1, -1, -1, -1, fmt.Errorf("calculateMaxMinFromWorksheet: %w", err) } maxVal = int(^uint(0) >> 1) minx = maxVal miny = maxVal maxy = 0 maxx = 0 for _, row := range worksheet.SheetData.Row { for _, cell := range row.C { x, y, err = GetCoordsFromCellIDString(cell.R) if err != nil { return wrap(err) } // break out of the loop if a column limit is set if colLimit != NoColLimit && x+1 > colLimit { break } if x < minx { minx = x } if x > maxx { maxx = x } if y < miny { miny = y } if y > maxy { maxy = y } } } if minx == maxVal { minx = 0 } if miny == maxVal { miny = 0 } return } // makeRowFromSpan will, when given a span expressed as a string, // return an empty Row large enough to encompass that span and // populate it with empty cells. All rows start from cell 1 - // regardless of the lower bound of the span. func makeRowFromSpan(spans string, sheet *Sheet) *Row { _, upper, err := getRangeFromString(spans) if err != nil { panic(err) } row := sheet.cellStore.MakeRowWithLen(sheet, upper) return row } // makeRowFromRaw returns the Row representation of the xlsxRow. func makeRowFromRaw(rawrow xlsxRow, sheet *Sheet) *Row { var upper int upper = -1 for _, rawcell := range rawrow.C { if rawcell.R != "" { x, _, error := GetCoordsFromCellIDString(rawcell.R) if error != nil { panic(fmt.Sprintf("Invalid Cell Coord, %s\n", rawcell.R)) } if x > upper { upper = x } continue } upper++ } upper++ row := sheet.cellStore.MakeRowWithLen(sheet, upper) row.SetOutlineLevel(rawrow.OutlineLevel) return row } func makeEmptyRow(sheet *Sheet) *Row { row := new(Row) row.Sheet = sheet return row } type sharedFormula struct { x, y int formula string } func formulaForCell(rawcell xlsxC, sharedFormulas map[int]sharedFormula) string { var res string f := rawcell.F if f == nil { return "" } if f.T == "shared" { x, y, err := GetCoordsFromCellIDString(rawcell.R) if err != nil { res = f.Content } else { if f.Ref != "" { res = f.Content sharedFormulas[f.Si] = sharedFormula{x, y, res} } else { sharedFormula := sharedFormulas[f.Si] dx := x - sharedFormula.x dy := y - sharedFormula.y orig := []byte(sharedFormula.formula) var start, end int var stringLiteral bool for end = 0; end < len(orig); end++ { c := orig[end] if c == '"' { stringLiteral = !stringLiteral } if stringLiteral { continue // Skip characters in quotes } if c >= 'A' && c <= 'Z' || c == '$' { res += string(orig[start:end]) start = end end++ foundNum := false for ; end < len(orig); end++ { idc := orig[end] if idc >= '0' && idc <= '9' || idc == '$' { foundNum = true } else if idc >= 'A' && idc <= 'Z' { if foundNum { break } } else { break } } if foundNum { cellID := string(orig[start:end]) res += shiftCell(cellID, dx, dy) start = end } } } if start < len(orig) { res += string(orig[start:]) } } } } else { res = f.Content } return strings.Trim(res, " \t\n\r") } // shiftCell returns the cell shifted according to dx and dy taking into consideration of absolute // references with dollar sign ($) func shiftCell(cellID string, dx, dy int) string { fx, fy, _ := GetCoordsFromCellIDString(cellID) // Is fixed column? fixedCol := strings.Index(cellID, fixedCellRefChar) == 0 // Is fixed row? fixedRow := strings.LastIndex(cellID, fixedCellRefChar) > 0 if !fixedCol { // Shift column fx += dx } if !fixedRow { // Shift row fy += dy } // New shifted cell shiftedCellID := GetCellIDStringFromCoords(fx, fy) if !fixedCol && !fixedRow { return shiftedCellID } // There are absolute references, need to put the $ back into the formula. letterPart := strings.Map(letterOnlyMapF, shiftedCellID) numberPart := strings.Map(intOnlyMapF, shiftedCellID) result := "" if fixedCol { result += "$" } result += letterPart if fixedRow { result += "$" } result += numberPart return result } // fillCellData attempts to extract a valid value, usable in // CSV form from the raw cell value. Note - this is not actually // general enough - we should support retaining tabs and newlines. func fillCellData(rawCell xlsxC, refTable *RefTable, sharedFormulas map[int]sharedFormula, cell *Cell) { val := strings.Trim(rawCell.V, " \t\n\r") cell.formula = formulaForCell(rawCell, sharedFormulas) switch rawCell.T { case "s": // Shared String cell.cellType = CellTypeString if val != "" { ref, err := strconv.Atoi(val) if err != nil { panic(err) } cell.Value, cell.RichText = refTable.ResolveSharedString(ref) } case "inlineStr": cell.cellType = CellTypeInline fillCellDataFromInlineString(rawCell, cell) case "b": // Boolean cell.Value = val cell.cellType = CellTypeBool case "e": // Error cell.Value = val cell.cellType = CellTypeError case "str": // String Formula (special type for cells with formulas that return a string value) // Unlike the other string cell types, the string is stored directly in the value. cell.Value = val cell.cellType = CellTypeStringFormula case "d": // Date: Cell contains a date in the ISO 8601 format. cell.Value = val cell.cellType = CellTypeDate case "": // Numeric is the default fallthrough case "n": // Numeric cell.Value = val cell.cellType = CellTypeNumeric default: panic(errors.New("invalid cell type")) } cell.origValue = cell.Value cell.origRichText = cell.RichText cell.modified = false } // fillCellDataFromInlineString attempts to get inline string data and put it into a Cell. func fillCellDataFromInlineString(rawcell xlsxC, cell *Cell) { cell.Value = "" cell.RichText = nil if rawcell.Is != nil { if rawcell.Is.T != nil { cell.Value = strings.Trim(rawcell.Is.T.getText(), " \t\n\r") } else { cell.RichText = xmlToRichText(rawcell.Is.R) } } cell.origValue = cell.Value cell.origRichText = cell.RichText cell.modified = false } // readRowsFromSheet is an internal helper function that extracts the // rows from a XSLXWorksheet, populates them with Cells and resolves // the value references from the reference table and stores them in // the rows and columns. func readRowsFromSheet(Worksheet *xlsxWorksheet, file *File, sheet *Sheet, rowLimit, colLimit int, linkTable hyperlinkTable) error { var row *Row var maxCol, maxRow, colCount, rowCount int var reftable *RefTable var err error var insertRowIndex int // , insertColIndex int sharedFormulas := map[int]sharedFormula{} wrap := func(err error) error { return fmt.Errorf("readRowsFromSheet: %w", err) } if len(Worksheet.SheetData.Row) == 0 { sheet.MaxRow = 0 sheet.MaxCol = 0 return nil } reftable = file.referenceTable if len(Worksheet.Dimension.Ref) > 0 && len(strings.Split(Worksheet.Dimension.Ref, cellRangeChar)) == 2 && rowLimit == NoRowLimit && colLimit == NoColLimit { _, _, maxCol, maxRow, err = getMaxMinFromDimensionRef(Worksheet.Dimension.Ref) } else { _, _, maxCol, maxRow, err = calculateMaxMinFromWorksheet(Worksheet, colLimit) } if err != nil { return wrap(err) } rowCount = maxRow + 1 colCount = maxCol + 1 if Worksheet.Cols != nil { // Columns can apply to a range, for convenience we expand the // ranges out into individual column definitions. for _, rawcol := range Worksheet.Cols.Col { col := &Col{ Hidden: rawcol.Hidden, Width: rawcol.Width, Min: rawcol.Min, Max: rawcol.Max, OutlineLevel: rawcol.OutlineLevel, BestFit: rawcol.BestFit, CustomWidth: rawcol.CustomWidth, Phonetic: rawcol.Phonetic, Collapsed: rawcol.Collapsed, } if file.styles != nil { if rawcol.Style != nil && *rawcol.Style > 0 { col.style = file.styles.getStyle(*rawcol.Style) col.numFmt, col.parsedNumFmt = file.styles.getNumberFormat(*rawcol.Style) } } sheet.Cols.Add(col) } } for rowIndex := 0; rowIndex < len(Worksheet.SheetData.Row); rowIndex++ { rawrow := Worksheet.SheetData.Row[rowIndex] // range is not empty and only one range exist if len(rawrow.Spans) != 0 && strings.Count(rawrow.Spans, cellRangeChar) == 1 { row = makeRowFromSpan(rawrow.Spans, sheet) } else { row = makeRowFromRaw(rawrow, sheet) } sheet.setCurrentRow(row) row.num = rawrow.R - 1 row.Hidden = rawrow.Hidden height, err := strconv.ParseFloat(rawrow.Ht, 64) if err == nil { row.SetHeight(height) } row.isCustom = rawrow.CustomHeight row.SetOutlineLevel(rawrow.OutlineLevel) for _, rawcell := range rawrow.C { if rawcell.R == "" { continue } h, v, err := Worksheet.MergeCells.getExtent(rawcell.R) if err != nil { return wrap(err) } x, y, err := GetCoordsFromCellIDString(rawcell.R) if err != nil { return wrap(err) } // break out of the loop if column limit is set if colLimit != NoColLimit && colLimit < x+1 { break } cellX := x cell := newCell(row, cellX) row.PushCell(cell) cell.HMerge = h cell.VMerge = v fillCellData(rawcell, reftable, sharedFormulas, cell) if file.styles != nil { cell.SetStyle(file.styles.getStyle(rawcell.S)) cell.NumFmt, cell.parsedNumFmt = file.styles.getNumberFormat(rawcell.S) } cell.date1904 = file.Date1904 if hyperlink, found := linkTable[coord{x: x, y: y}]; found { cell.Hyperlink = hyperlink } // Cell is considered hidden if the row or the column of this cell is hidden col := sheet.Cols.FindColByIndex(cellX + 1) cell.Hidden = rawrow.Hidden || (col != nil && col.Hidden != nil && *col.Hidden) cell.modified = true } sheet.cellStore.WriteRow(row) insertRowIndex++ } sheet.MaxRow = rowCount sheet.MaxCol = colCount if rowCount >= 0 { row, err = sheet.Row(0) if err != nil { return wrap(err) } sheet.setCurrentRow(row) } return nil } type indexedSheet struct { Index int Sheet *Sheet Error error } func readSheetViews(xSheetViews xlsxSheetViews) []SheetView { if xSheetViews.SheetView == nil || len(xSheetViews.SheetView) == 0 { return nil } sheetViews := []SheetView{} for _, xSheetView := range xSheetViews.SheetView { sheetView := SheetView{} if xSheetView.Pane != nil { xlsxPane := xSheetView.Pane pane := &Pane{} pane.XSplit = xlsxPane.XSplit pane.YSplit = xlsxPane.YSplit pane.TopLeftCell = xlsxPane.TopLeftCell pane.ActivePane = xlsxPane.ActivePane pane.State = xlsxPane.State sheetView.Pane = pane } sheetViews = append(sheetViews, sheetView) } return sheetViews } type coord struct { x int y int } type hyperlinkTable map[coord]Hyperlink func makeHyperlinkTable(worksheet *xlsxWorksheet, fi *File, rsheet *xlsxSheet) (hyperlinkTable, error) { wrap := func(err error) (hyperlinkTable, error) { return nil, fmt.Errorf("makeHyperlinkTable: %w", err) } table := make(hyperlinkTable) // Convert xlsxHyperlinks to Hyperlinks if worksheet.Hyperlinks != nil { worksheetRelsFile, ok := fi.worksheetRels["sheet"+rsheet.SheetId] worksheetRels := new(xlsxWorksheetRels) if ok { rc, err := worksheetRelsFile.Open() if err != nil { return wrap(fmt.Errorf("file.Open: %w", err)) } defer rc.Close() decoder := xml.NewDecoder(rc) err = decoder.Decode(worksheetRels) if err != nil { return wrap(fmt.Errorf("xml.Decoder.Decode: %w", err)) } } for _, xlsxLink := range worksheet.Hyperlinks.HyperLinks { newHyperLink := Hyperlink{} for _, rel := range worksheetRels.Relationships { if rel.Id == xlsxLink.RelationshipId { newHyperLink.Link = rel.Target break } } if xlsxLink.Tooltip != "" { newHyperLink.Tooltip = xlsxLink.Tooltip } if xlsxLink.DisplayString != "" { newHyperLink.DisplayString = xlsxLink.DisplayString } if xlsxLink.Location != "" { newHyperLink.Location = xlsxLink.Location } cellRef := xlsxLink.Reference x, y, err := GetCoordsFromCellIDString(cellRef) if err != nil { return wrap(err) } table[coord{x: x, y: y}] = newHyperLink } // row, err := sheet.Row(y) // if err != nil { // return wrap(err) // } // fmt.Printf("%d, %d, %+v\n", x, y, row) // // cell := row.GetCell(x) // // cell.Hyperlink = newHyperLink // } } return table, nil } // readSheetFromFile is the logic of converting a xlsxSheet struct // into a Sheet struct. This work can be done in parallel and so // readSheetsFromZipFile will spawn an instance of this function per // sheet and get the results back on the provided channel. func readSheetFromFile(rsheet xlsxSheet, fi *File, sheetXMLMap map[string]string, rowLimit, colLimit int, valueOnly bool) (sheet *Sheet, errRes error) { defer func() { if x := recover(); x != nil { errRes = fmt.Errorf("%v\n%s\n", x, debug.Stack()) } }() wrap := func(err error) (*Sheet, error) { return nil, fmt.Errorf("readSheetFromFile: %w", err) } worksheet, err := getWorksheetFromSheet(rsheet, fi.worksheets, sheetXMLMap, rowLimit, valueOnly) if err != nil { return wrap(err) } linkTable, err := makeHyperlinkTable(worksheet, fi, &rsheet) if err != nil { return wrap(err) } sheet, err = NewSheetWithCellStore(rsheet.Name, fi.cellStoreConstructor) if err != nil { return wrap(err) } sheet.File = fi err = readRowsFromSheet(worksheet, fi, sheet, rowLimit, colLimit, linkTable) if err != nil { return wrap(err) } sheet.Hidden = rsheet.State == sheetStateHidden || rsheet.State == sheetStateVeryHidden sheet.SheetViews = readSheetViews(worksheet.SheetViews) if worksheet.AutoFilter != nil { autoFilterBounds := strings.Split(worksheet.AutoFilter.Ref, ":") sheet.AutoFilter = &AutoFilter{autoFilterBounds[0], autoFilterBounds[1]} } sheet.SheetFormat.DefaultColWidth = worksheet.SheetFormatPr.DefaultColWidth sheet.SheetFormat.DefaultRowHeight = worksheet.SheetFormatPr.DefaultRowHeight sheet.SheetFormat.OutlineLevelCol = worksheet.SheetFormatPr.OutlineLevelCol sheet.SheetFormat.OutlineLevelRow = worksheet.SheetFormatPr.OutlineLevelRow if nil != worksheet.DataValidations { for _, dd := range worksheet.DataValidations.DataValidation { sheet.AddDataValidation(dd) } } return sheet, nil } // readSheetsFromZipFile is an internal helper function that loops // over the Worksheets defined in the XSLXWorkbook and loads them into // Sheet objects stored in the Sheets slice of a xlsx.File struct. func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]string, rowLimit, colLimit int, valueOnly bool) (map[string]*Sheet, []*Sheet, error) { var workbook *xlsxWorkbook var err error var rc io.ReadCloser var decoder *xml.Decoder var sheetCount int wrap := func(err error) (map[string]*Sheet, []*Sheet, error) { return nil, nil, fmt.Errorf("readSheetsFromZipFile: %w", err) } workbook = new(xlsxWorkbook) rc, err = f.Open() if err != nil { return wrap(fmt.Errorf("file.Open: %w", err)) } defer rc.Close() decoder = xml.NewDecoder(rc) err = decoder.Decode(workbook) if err != nil { return wrap(fmt.Errorf("xml.Decoder.Decode: %w", err)) } file.Date1904 = workbook.WorkbookPr.Date1904 for entryNum := range workbook.DefinedNames.DefinedName { file.DefinedNames = append(file.DefinedNames, &workbook.DefinedNames.DefinedName[entryNum]) } // Only try and read sheets that have corresponding files. // Notably this excludes chartsheets don't right now var workbookSheets []xlsxSheet for _, sheet := range workbook.Sheets.Sheet { if f := worksheetFileForSheet(sheet, file.worksheets, sheetXMLMap); f != nil { workbookSheets = append(workbookSheets, sheet) } } sheetCount = len(workbookSheets) sheetsByName := make(map[string]*Sheet, sheetCount) sheets := make([]*Sheet, sheetCount) sheetChan := make(chan *indexedSheet, sheetCount) for i, rawsheet := range workbookSheets { i, rawsheet := i, rawsheet go func() { sheet, err := readSheetFromFile(rawsheet, file, sheetXMLMap, rowLimit, colLimit, valueOnly) sheetChan <- &indexedSheet{ Index: i, Sheet: sheet, Error: err, } }() } for j := 0; j < sheetCount; j++ { sheet := <-sheetChan if sheet == nil { // FIXME channel leak return wrap(fmt.Errorf("No sheet returnded from readSheetFromFile")) } if sheet.Error != nil { // FIXME channel leak return wrap(sheet.Error) } sheetName := sheet.Sheet.Name sheetsByName[sheetName] = sheet.Sheet sheets[sheet.Index] = sheet.Sheet } close(sheetChan) return sheetsByName, sheets, nil } // readSharedStringsFromZipFile() is an internal helper function to // extract a reference table from the sharedStrings.xml file within // the XLSX zip file. func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) { var sst *xlsxSST var err error var rc io.ReadCloser var decoder *xml.Decoder var reftable *RefTable wrap := func(err error) (*RefTable, error) { return nil, fmt.Errorf("readSharedStringsFromZipFile: %w", err) } // In a file with no strings it's possible that // sharedStrings.xml doesn't exist. In this case the value // passed as f will be nil. if f == nil { return nil, nil } rc, err = f.Open() if err != nil { return wrap(err) } defer rc.Close() sst = new(xlsxSST) decoder = xml.NewDecoder(rc) err = decoder.Decode(sst) if err != nil { return wrap(err) } reftable = MakeSharedStringRefTable(sst) return reftable, nil } // readStylesFromZipFile() is an internal helper function to // extract a style table from the style.xml file within // the XLSX zip file. func readStylesFromZipFile(f *zip.File, theme *theme) (*xlsxStyleSheet, error) { var style *xlsxStyleSheet var err error var rc io.ReadCloser var decoder *xml.Decoder wrap := func(err error) (*xlsxStyleSheet, error) { return nil, fmt.Errorf("readStylesFromZipFile: %w", err) } rc, err = f.Open() if err != nil { return wrap(err) } defer rc.Close() style = newXlsxStyleSheet(theme) decoder = xml.NewDecoder(rc) err = decoder.Decode(style) if err != nil { return wrap(err) } buildNumFmtRefTable(style) return style, nil } func buildNumFmtRefTable(style *xlsxStyleSheet) { if style.NumFmts != nil { for _, numFmt := range style.NumFmts.NumFmt { // We do this for the side effect of populating the NumFmtRefTable. style.addNumFmt(numFmt) } } } func readThemeFromZipFile(f *zip.File) (*theme, error) { wrap := func(err error) (*theme, error) { return nil, fmt.Errorf("readThemeFromZipFile: %w", err) } rc, err := f.Open() if err != nil { return wrap(err) } defer rc.Close() var themeXml xlsxTheme err = xml.NewDecoder(rc).Decode(&themeXml) if err != nil { return wrap(err) } return newTheme(themeXml), nil } type WorkBookRels map[string]string func (w *WorkBookRels) MakeXLSXWorkbookRels() xlsxWorkbookRels { relCount := len(*w) xWorkbookRels := xlsxWorkbookRels{} xWorkbookRels.Relationships = make([]xlsxWorkbookRelation, relCount+3) for k, v := range *w { index, err := strconv.Atoi(k[3:]) if err != nil { panic(err.Error()) } xWorkbookRels.Relationships[index-1] = xlsxWorkbookRelation{ Id: k, Target: v, Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"} } relCount++ sheetId := fmt.Sprintf("rId%d", relCount) xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{ Id: sheetId, Target: "sharedStrings.xml", Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"} relCount++ sheetId = fmt.Sprintf("rId%d", relCount) xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{ Id: sheetId, Target: "theme/theme1.xml", Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"} relCount++ sheetId = fmt.Sprintf("rId%d", relCount) xWorkbookRels.Relationships[relCount-1] = xlsxWorkbookRelation{ Id: sheetId, Target: "styles.xml", Type: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"} return xWorkbookRels } // readWorkbookRelationsFromZipFile is an internal helper function to // extract a map of relationship ID strings to the name of the // worksheet.xml file they refer to. The resulting map can be used to // reliably derefence the worksheets in the XLSX file. func readWorkbookRelationsFromZipFile(workbookRels *zip.File) (WorkBookRels, error) { var sheetXMLMap WorkBookRels var wbRelationships *xlsxWorkbookRels var rc io.ReadCloser var decoder *xml.Decoder var err error wrap := func(err error) (WorkBookRels, error) { return nil, fmt.Errorf("readWorkbookRelationsFromZipFile :%w", err) } rc, err = workbookRels.Open() if err != nil { return wrap(err) } defer rc.Close() decoder = xml.NewDecoder(rc) wbRelationships = new(xlsxWorkbookRels) err = decoder.Decode(wbRelationships) if err != nil { return wrap(err) } sheetXMLMap = make(WorkBookRels) for _, rel := range wbRelationships.Relationships { if strings.HasSuffix(rel.Target, ".xml") && rel.Type == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" { _, filename := path.Split(rel.Target) sheetXMLMap[rel.Id] = strings.Replace(filename, ".xml", "", 1) } } return sheetXMLMap, nil } // ReadZip() takes a pointer to a zip.ReadCloser and returns a // xlsx.File struct populated with its contents. In most cases // ReadZip is not used directly, but is called internally by OpenFile. func ReadZip(f *zip.ReadCloser, options ...FileOption) (*File, error) { defer f.Close() file, err := ReadZipReader(&f.Reader, options...) if err != nil { return nil, fmt.Errorf("ReadZip: %w", err) } return file, nil } // ReadZipReader() can be used to read an XLSX in memory without // touching the filesystem. func ReadZipReader(r *zip.Reader, options ...FileOption) (*File, error) { var err error var file *File var reftable *RefTable var sharedStrings *zip.File var sheetXMLMap map[string]string var sheetsByName map[string]*Sheet var sheets []*Sheet var style *xlsxStyleSheet var styles *zip.File var themeFile *zip.File var v *zip.File var workbook *zip.File var workbookRels *zip.File var worksheets map[string]*zip.File var worksheetRels map[string]*zip.File wrap := func(err error) (*File, error) { return nil, fmt.Errorf("ReadZipReader: %w", err) } file = NewFile(options...) worksheets = make(map[string]*zip.File, len(r.File)) worksheetRels = make(map[string]*zip.File, len(r.File)) for _, v = range r.File { _, name := filepath.Split(v.Name) switch name { case `sharedStrings.xml`: sharedStrings = v case `workbook.xml`: workbook = v case `workbook.xml.rels`: workbookRels = v case `styles.xml`: styles = v case `theme1.xml`: themeFile = v default: if len(v.Name) > 17 { if v.Name[0:13] == "xl/worksheets" || v.Name[0:13] == `xl\worksheets` { if v.Name[len(v.Name)-5:] == ".rels" { worksheetRels[v.Name[20:len(v.Name)-9]] = v } else { worksheets[v.Name[14:len(v.Name)-4]] = v } } } } } if workbookRels == nil { return wrap(fmt.Errorf("workbook.xml.rels not found in input xlsx.")) } sheetXMLMap, err = readWorkbookRelationsFromZipFile(workbookRels) if err != nil { return wrap(err) } if len(worksheets) == 0 { return wrap(fmt.Errorf("Input xlsx contains no worksheets.")) } file.worksheets = worksheets file.worksheetRels = worksheetRels reftable, err = readSharedStringsFromZipFile(sharedStrings) if err != nil { return wrap(err) } file.referenceTable = reftable if themeFile != nil { theme, err := readThemeFromZipFile(themeFile) if err != nil { return wrap(err) } file.theme = theme } if styles != nil { style, err = readStylesFromZipFile(styles, file.theme) if err != nil { return wrap(err) } file.styles = style } sheetsByName, sheets, err = readSheetsFromZipFile(workbook, file, sheetXMLMap, file.rowLimit, file.colLimit, file.valueOnly) if err != nil { return wrap(err) } if sheets == nil { readerErr := new(XLSXReaderError) readerErr.Err = "No sheets found in XLSX File" return wrap(readerErr) } file.Sheet = sheetsByName file.Sheets = sheets return file, nil } // truncateSheetXML will take in a reader to an XML sheet file and will return a reader that will read an equivalent // XML sheet file with only the number of rows specified. This greatly speeds up XML unmarshalling when only // a few rows need to be read from a large sheet. // When sheets are truncated, all formatting present after the sheetData tag will be lost, but all of this formatting // is related to printing and visibility, and is out of scope for most purposes of this library. func truncateSheetXML(r io.Reader, rowLimit int) (io.Reader, error) { var rowCount int var token xml.Token var readErr error output := new(bytes.Buffer) r = io.TeeReader(r, output) decoder := xml.NewDecoder(r) var ns string for { token, readErr = decoder.Token() if readErr == io.EOF { break } else if readErr != nil { return nil, readErr } if start, ok := token.(xml.StartElement); ok && start.Name.Local == "worksheet" && start.Name.Space != "" { namespace := start.Name.Space // find if the namespace has a short name for _, attr := range start.Attr { if attr.Name.Space == "xmlns" && attr.Value == namespace { ns = attr.Name.Local break } } } end, ok := token.(xml.EndElement) if ok && end.Name.Local == "row" { rowCount++ if rowCount >= rowLimit { break } } } offset := decoder.InputOffset() output.Truncate(int(offset)) if readErr != io.EOF { sheetEnding := `` if ns != "" { sheetEnding = fmt.Sprintf(``, ns, ns) } _, err := output.Write([]byte(sheetEnding)) if err != nil { return nil, err } } return output, nil } // truncateSheetXMLValueOnly will take in a reader to an XML sheet file and will return a reader that will read an equivalent // XML sheet file without null vaules of rows. This greatly speeds up XML unmarshalling when we // only need non-NULL data for the sheet. // When sheets are truncated, most of formatting present will be not right, but all of this formatting // is related to printing and visibility, and is out of scope for most purposes of this library. func truncateSheetXMLValueOnly(r io.Reader) (io.Reader, error) { sheetXML, err := ioutil.ReadAll(r) if err != nil { return nil, err } rowRegexp, _ := regexp.Compile(`(?s)?.*?`) cellRegexp, _ := regexp.Compile(`(?s)?.*?/.*?>`) valueRegexp, _ := regexp.Compile(`(?s).*?`) mergerRegexp, _ := regexp.Compile(``) dimensionRegexp, _ := regexp.Compile(``) // record merger cell mergerMap := make(map[string][]byte) mergerByte := mergerRegexp.FindAll(sheetXML, -1) for _, v := range mergerByte { mergerCells := strings.SplitN(strings.SplitN(strings.SplitN(string(v), "ref=\"", 2)[1], "\"/", 2)[0], ":", -1) for _, v := range mergerCells { mergerCellRegexp, err := regexp.Compile(fmt.Sprintf("|c>]", v)) if err != nil { return nil, err } sheetXML = mergerCellRegexp.ReplaceAllFunc(sheetXML, func(mergeMatch []byte) []byte { if !valueRegexp.Match(mergeMatch) { id := "aaa" //generator.Hex128() mergerMap[id] = mergerCellRegexp.Find(sheetXML) mergeMatch = mergerCellRegexp.ReplaceAll(mergeMatch, []byte(id)) } return mergeMatch }) } } // Delete all null value var firstCell, lastCell []byte sheetXML = rowRegexp.ReplaceAllFunc(sheetXML, func(rowMatch []byte) []byte { if !valueRegexp.Match(rowMatch) { rowMatch = rowRegexp.ReplaceAll(rowMatch, nil) } rowMatch = cellRegexp.ReplaceAllFunc(rowMatch, func(cellMatch []byte) []byte { if !valueRegexp.Match(cellMatch) { cellMatch = cellRegexp.ReplaceAll(cellMatch, nil) } else { if firstCell == nil { firstCell = cellMatch } lastCell = cellMatch } return cellMatch }) return rowMatch }) // restoring mergerMap for k, v := range mergerMap { sheetXML = bytes.ReplaceAll(sheetXML, []byte(k), v) } // replace the dimension if firstCell != nil && lastCell != nil { firstCellStr := strings.SplitN(strings.SplitN(string(firstCell), "r=\"", 2)[1], "\"", 2)[0] lastCellStr := strings.SplitN(strings.SplitN(string(lastCell), "r=\"", 2)[1], "\"", 2)[0] dimension := fmt.Sprintf("", firstCellStr, lastCellStr) sheetXML = dimensionRegexp.ReplaceAll(sheetXML, []byte(dimension)) } output := new(bytes.Buffer) _, err = output.Write(sheetXML) if err != nil { return nil, err } return output, nil }