diff --git a/3rdparty/libs/fileext/excel/README.md b/3rdparty/libs/fileext/excel/README.md index cfd025e..f6723b0 100644 --- a/3rdparty/libs/fileext/excel/README.md +++ b/3rdparty/libs/fileext/excel/README.md @@ -1,30 +1,69 @@ -# EXCEL2HTML +# Excel 解析模块 / Excel Parsing Module -XSLX/XLS files into HTML сonverter +## 概述 / Overview -## Usage: +本模块负责从 Excel 文件(`.xlsx` 和 `.xls`)中提取纯文本内容,供全文检索使用。 + +This module extracts plain text content from Excel files (`.xlsx` and `.xls`) for full-text search purposes. + +## 文件结构 / File Structure + +``` +excel/ +├── excel.cpp / excel.hpp # 入口类,根据扩展名分发解析 +├── excel_xlsxio.cpp / .hpp # XLSX 解析(基于 xlsxio SAX 流式) +├── excel_libxls.cpp / .hpp # XLS 解析(基于 libxls) +├── xlsxio/ # xlsxio 库源码 +│ ├── xlsxio_read.c # SAX 流式读取实现 +│ ├── xlsxio_read_sharedstrings.c # 共享字符串表处理 +│ └── xlsxio_read.h / *.h # 头文件 +└── libxls/ # libxls 库源码 + ├── xls.c / ole.c / xlstool.c # OLE + BIFF 解析实现 + ├── endian.c / locale.c # 字节序与编码处理 + └── include/ # 头文件 + ├── xls.h + └── libxls/ ``` -#include "excel/excel.hpp" -excel::Excel document("test.xlsx", "xlsx"); -document.convert(true, true, 0); -document.saveHtml("out_dir", "test.html"); +## 架构 / Architecture + +``` +Excel::convert() + │ + ├── .xlsx ──→ parseXlsxWithXlsxio() + │ └── xlsxio (SAX 流式解析,expat + minizip) + │ + └── .xls ──→ parseXlsWithLibxls() + └── libxls (OLE + BIFF 解析,内置 UTF-8 转换) ``` -## Features -| Extension | Text | Styles extraction | Images extraction | -| :---: | :---:| :---: | :---: | -| XLSX | Yes | Yes | Yes | -| XLS | Yes | Yes | No | +## 外部依赖 / External Dependencies + +| 依赖 | 用途 | 许可证 | +|------|------|--------| +| expat | xlsxio 的 XML SAX 解析 | MIT | +| minizip | xlsxio 的 ZIP 解压 | Zlib | +| zlib | minizip 的底层依赖 | Zlib | +| iconv | libxls 的编码转换(系统库) | LGPL | -- Table cell styles -- Images -- Bold/Italic/Underline/Strike/Sup(sub)string font style -- Font colors and names -- Horizontal and vertical aligment +xlsxio 和 libxls 的源码已直接包含在本目录中,无需额外下载。 + +The xlsxio and libxls sources are bundled locally; no additional download required. + +## 输出格式 / Output Format + +每个非空单元格的值后跟换行符 `\n`,所有工作表的内容顺序拼接: + +Each non-empty cell value is followed by a newline `\n`; all sheet contents are concatenated in order: + +``` +A1的值 +B1的值 +A2的值 +... +``` -## Dependencies -- iconv +## 第三方库版本 / Bundled Library Versions -## Thanks -- [python-excel](https://github.com/python-excel/xlrd) - XLSX and XLS converter (Python) +- **xlsxio**: 基于 [brechtsanders/xlsxio](https://github.com/brechtsanders/xlsxio) (MIT License) +- **libxls**: 基于 [libxls/libxls](https://github.com/libxls/libxls) v1.6.3 (BSD-2-Clause License) diff --git a/3rdparty/libs/fileext/excel/biffh.hpp b/3rdparty/libs/fileext/excel/biffh.hpp deleted file mode 100644 index 03c531a..0000000 --- a/3rdparty/libs/fileext/excel/biffh.hpp +++ /dev/null @@ -1,173 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file biffh.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 18.10.2017 - */ -#pragma once - -#include -#include - - -namespace excel { - -/** Classification that has been inferred from format string */ -enum { - FUN, ///< Unknown - FDT, ///< Date - FNU, ///< Number - FGE, ///< General - FTX ///< Text -}; -/** Cell types */ -enum { - XL_CELL_EMPTY, ///< Empty - XL_CELL_TEXT, ///< Text - XL_CELL_NUMBER, ///< Number - XL_CELL_DATE, ///< Date - XL_CELL_BOOLEAN, ///< Boolean - XL_CELL_ERROR, ///< Error - XL_CELL_BLANK ///< Blank. For use in debugging, gathering stats, etc -}; -/** Record codes */ -enum { - XL_RK2 = 0x7e, - XL_ARRAY = 0x0221, - XL_ARRAY2 = 0x0021, - XL_BLANK = 0x0201, - XL_BLANK_B2 = 0x01, - XL_BOF = 0x809, - XL_BOOLERR = 0x205, - XL_BOOLERR_B2 = 0x5, - XL_BOUNDSHEET = 0x85, - XL_BUILTINFMTCOUNT = 0x56, - XL_CF = 0x01B1, - XL_CODEPAGE = 0x42, - XL_COLINFO = 0x7D, - XL_COLUMNDEFAULT = 0x20, ///< BIFF2 only - XL_COLWIDTH = 0x24, ///< BIFF2 only - XL_CONDFMT = 0x01B0, - XL_CONTINUE = 0x3c, - XL_COUNTRY = 0x8C, - XL_DATEMODE = 0x22, - XL_DEFAULTROWHEIGHT = 0x0225, - XL_DEFCOLWIDTH = 0x55, - XL_DIMENSION = 0x200, - XL_DIMENSION2 = 0x0, - XL_EFONT = 0x45, - XL_EOF = 0x0a, - XL_EXTERNNAME = 0x23, - XL_EXTERNSHEET = 0x17, - XL_EXTSST = 0xff, - XL_FEAT11 = 0x872, - XL_FILEPASS = 0x2f, - XL_FONT = 0x31, - XL_FONT_B3B4 = 0x231, - XL_FORMAT = 0x41e, - XL_FORMAT2 = 0x1E, ///< BIFF2, BIFF3 - XL_FORMULA = 0x6, - XL_FORMULA3 = 0x206, - XL_FORMULA4 = 0x406, - XL_GCW = 0xab, - XL_HLINK = 0x01B8, - XL_QUICKTIP = 0x0800, - XL_HORIZONTALBREAKS = 0x1b, - XL_INDEX = 0x20b, - XL_INTEGER = 0x2, ///< BIFF2 only - XL_IXFE = 0x44, ///< BIFF2 only - XL_LABEL = 0x204, - XL_LABEL_B2 = 0x04, - XL_LABELRANGES = 0x15f, - XL_LABELSST = 0xfd, - XL_LEFTMARGIN = 0x26, - XL_TOPMARGIN = 0x28, - XL_RIGHTMARGIN = 0x27, - XL_BOTTOMMARGIN = 0x29, - XL_HEADER = 0x14, - XL_FOOTER = 0x15, - XL_HCENTER = 0x83, - XL_VCENTER = 0x84, - XL_MERGEDCELLS = 0xE5, - XL_MSO_DRAWING = 0x00EC, - XL_MSO_DRAWING_GROUP = 0x00EB, - XL_MSO_DRAWING_SELECT = 0x00ED, - XL_MULRK = 0xbd, - XL_MULBLANK = 0xbe, - XL_NAME = 0x18, - XL_NOTE = 0x1c, - XL_NUMBER = 0x203, - XL_NUMBER_B2 = 0x3, - XL_OBJ = 0x5D, - XL_PAGESETUP = 0xA1, - XL_PALETTE = 0x92, - XL_PANE = 0x41, - XL_PRINTGRIDLINES = 0x2B, - XL_PRINTHEADERS = 0x2A, - XL_RK = 0x27e, - XL_ROW = 0x208, - XL_ROW_B2 = 0x08, - XL_RSTRING = 0xd6, - XL_SCL = 0x00A0, - XL_SHEETHDR = 0x8F, ///< BIFF4W only - XL_SHEETPR = 0x81, - XL_SHEETSOFFSET = 0x8E, ///< BIFF4W only - XL_SHRFMLA = 0x04bc, - XL_SST = 0xfc, - XL_STANDARDWIDTH = 0x99, - XL_STRING = 0x207, - XL_STRING_B2 = 0x7, - XL_STYLE = 0x293, - XL_SUPBOOK = 0x1AE, ///< aka EXTERNALBOOK in OOo docs - XL_TABLEOP = 0x236, - XL_TABLEOP2 = 0x37, - XL_TABLEOP_B2 = 0x36, - XL_TXO = 0x1b6, - XL_UNCALCED = 0x5e, - XL_UNKNOWN = 0xffff, - XL_VERTICALPAGEBREAKS = 0x1a, - XL_WINDOW2 = 0x023E, - XL_WINDOW2_B2 = 0x003E, - XL_WRITEACCESS = 0x5C, - XL_WSBOOL = XL_SHEETPR, - XL_XF = 0xe0, - XL_XF2 = 0x0043, ///< BIFF2 version of XF record - XL_XF3 = 0x0243, ///< BIFF3 version of XF record - XL_XF4 = 0x0443, ///< BIFF4 version of XF record -}; -/** Cell type from format type */ -const std::unordered_map CELL_TYPE_FROM_FORMAT_TYPE { - {FNU, XL_CELL_NUMBER}, - {FUN, XL_CELL_NUMBER}, - {FGE, XL_CELL_NUMBER}, - {FDT, XL_CELL_DATE}, - {FTX, XL_CELL_NUMBER} // Yes, number can be formatted as text -}; -/** BOF codes */ -const std::vector BOF_CODES = { - 0x0809, 0x0409, 0x0209, 0x0009 -}; -/** Error text from code */ -const std::unordered_map ERROR_TEXT_FROM_CODE { - {0x00, "#NULL!"}, // Intersection of two cell ranges is empty - {0x07, "#DIV/0!"}, // Division by zero - {0x0F, "#VALUE!"}, // Wrong type of operand - {0x17, "#REF!"}, // Illegal or deleted cell reference - {0x1D, "#NAME?"}, // Wrong function or range name - {0x24, "#NUM!"}, // Value range overflow - {0x2A, "#N/A"} // Argument or function not available -}; -/** Error code from text */ -const std::unordered_map ERROR_CODE_FROM_TEXT { - {"#NULL!", 0x00}, // Intersection of two cell ranges is empty - {"#DIV/0!", 0x07}, // Division by zero - {"#VALUE!", 0x0F}, // Wrong type of operand - {"#REF!", 0x17}, // Illegal or deleted cell reference - {"#NAME?", 0x1D}, // Wrong function or range name - {"#NUM!", 0x24}, // Value range overflow - {"#N/A", 0x2A} // Argument or function not available -}; - -} // End namespace \ No newline at end of file diff --git a/3rdparty/libs/fileext/excel/book.cpp b/3rdparty/libs/fileext/excel/book.cpp deleted file mode 100644 index 9dc302a..0000000 --- a/3rdparty/libs/fileext/excel/book.cpp +++ /dev/null @@ -1,816 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file book.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 29.01.2018 - */ -#include - -#include "encoding/encoding.hpp" -#include "tools.hpp" - -#include "biffh.hpp" -#include "formula.hpp" -#include "sheet.hpp" - -#include "book.hpp" - -namespace excel { - -/** Supbook types */ -enum { - SUPBOOK_UNK, ///< Unknown - SUPBOOK_INTERNAL, ///< Internal - SUPBOOK_EXTERNAL, ///< Extarnal - SUPBOOK_ADDIN, ///< Addin - SUPBOOK_DDEOLE ///< DDE OLE -}; -const int XL_WORKBOOK_GLOBALS = 0x5; -const int XL_WORKBOOK_GLOBALS_4W = 0x100; -const int XL_WORKSHEET = 0x10; -const int XL_BOUNDSHEET_WORKSHEET = 0x00; -/** BIFF supported versions */ -const std::vector SUPPORTED_VERSIONS {80, 70, 50, 45, 40, 30, 21, 20}; -/** BOF length list */ -const std::unordered_map BOF_LENGTH { - {0x0809, 8}, - {0x0409, 6}, - {0x0209, 6}, - {0x0009, 4} -}; -/** Get built-in name from code */ -const std::unordered_map BUILTIN_NAME_FROM_CODE { - {"Consolidate_Area", "\x00"}, - {"Auto_Open", "\x01"}, - {"Auto_Close", "\x02"}, - {"Extract", "\x03"}, - {"Database", "\x04"}, - {"Criteria", "\x05"}, - {"Print_Area", "\x06"}, - {"Print_Titles", "\x07"}, - {"Recorder", "\x08"}, - {"Data_Form", "\x09"}, - {"Auto_Activate", "\x0A"}, - {"Auto_Deactivate", "\x0B"}, - {"Sheet_Title", "\x0C"}, - {"_FilterDatabase", "\x0D"} -}; -/** Get encoding from codepage */ -const std::unordered_map ENCODING_FROM_CODEPAGE { - {1200, "UTF-16LE"}, - {10000, "MacRoman"}, - {10006, "MacGreek"}, - {10007, "MacCyrillic"}, - {10029, "MacLatin2"}, - {10079, "MacIceland"}, - {10081, "MacTurkish"}, - {32768, "MacRoman"}, - {32769, "CP1252"} -}; -/** BIFF text version */ -const std::unordered_map BIFF_TEXT { - {0, "(not BIFF)"}, - {20, "2.0"}, - {21, "2.1"}, - {30, "3"}, - {40, "4S"}, - {45, "4W"}, - {50, "5"}, - {70, "7"}, - {80, "8"}, - {85, "8X"} -}; - -// Book public: -Book::Book(const std::string& fileName, std::string &text, bool addStyle) -: Cfb(fileName), m_contentText(text), m_addStyle(addStyle) {} - -void Book::openWorkbookXls() { - // Read CFB part - Cfb::parse(); - m_workBook = getStream("Workbook"); - if (m_workBook.empty()) - return; - Cfb::clear(); - - m_biffVersion = getBiffVersion(XL_WORKBOOK_GLOBALS); - if (!m_biffVersion) - throw std::logic_error("Can't determine file's BIFF version"); - if (find(SUPPORTED_VERSIONS.begin(), SUPPORTED_VERSIONS.end(), m_biffVersion) == - SUPPORTED_VERSIONS.end() - ) - throw std::invalid_argument("BIFF version "+ BIFF_TEXT.at(m_biffVersion) +" is not supported"); - - if (m_biffVersion <= 40) { - // No workbook globals, only 1 worksheet - getFakeGlobalsSheet(); - } - else if (m_biffVersion == 45) { - // Worksheet(s) embedded in global stream - parseGlobals(); - } - else { - parseGlobals(); - m_sheetList.clear(); - size_t sheetCount = m_sheetNames.size(); - for (size_t i = 0; i < sheetCount; ++i) - getSheet(i); - } - m_sheetCount = m_sheetList.size(); - - // Release resources - m_workBook.clear(); - m_sharedStrings.clear(); - m_richtextRunlistMap.clear(); - m_workBook.shrink_to_fit(); - m_sharedStrings.shrink_to_fit(); -} - -void Book::handleWriteAccess(const std::string& data) { - std::string str; - if (m_biffVersion < 80) { - if (m_encoding.empty()) { - m_isRawUserName = true; - m_userName = data; - return; - } - str = unpackString(data, 0, 1); - } - else - str = unpackUnicode(data, 0, 2); - m_userName = tools::rtrim(str); -} - -void Book::getRecordParts(unsigned short& code, unsigned short& length, - std::string& data, int condition) -{ - int pos = m_position; - code = readByte(m_workBook, pos, 2); - length = readByte(m_workBook, pos+2, 2); - - if (condition != -1 && code != condition) { - data = ""; - code = 0; - length = 0; - return; - } - pos += 4; - data = m_workBook.substr(pos, length); - m_position = pos + length; -} - -void Book::getEncoding() { - if (!m_codePage) { - if (m_biffVersion < 80) - m_encoding = "ascii"; - else - m_codePage = 1200; // utf16le - } - else { - if (ENCODING_FROM_CODEPAGE.find(m_codePage) != ENCODING_FROM_CODEPAGE.end()) - m_encoding = ENCODING_FROM_CODEPAGE.at(m_codePage); - else if (300 <= m_codePage && m_codePage <= 1999) - m_encoding = "cp" + std::to_string(m_codePage); - else - m_encoding = "unknown_codepage_" + std::to_string(m_codePage); - } - if (m_isRawUserName) { - m_userName = tools::rtrim(unpackString(m_userName, 0, 1)); - m_isRawUserName = false; - } -} - -std::string Book::unpackString(const std::string& data, int pos, int length) const { - std::string result = data.substr(pos + length, readByte(data, pos, length)); - return encoding::decode(result, m_encoding); -} - -std::string Book::unpackStringUpdatePos(const std::string& data, int& pos, - int length, int knownLength) const -{ - int charCount; - if (knownLength) { - // On a NAME record, the length byte is detached from the front of the string - charCount = knownLength; - } - else { - charCount = readByte(data, pos, length); - pos += length; - } - pos += charCount; - std::string result = data.substr(pos - charCount, charCount); - return encoding::decode(result, m_encoding); -} - -std::string Book::unpackUnicode(const std::string& data, int pos, int length) const { - unsigned short charCount = readByte(data, pos, length); - // Ambiguous whether 0-length string should have an "options" byte. Avoid crash if missing - if (!charCount) - return ""; - - pos += length; - std::string result; - char options = data[pos]; - pos += 1; - if (options & 0x08) - pos += 2; - if (options & 0x04) - pos += 4; - if (options & 0x01) { - // Uncompressed UTF-16-LE - result = data.substr(pos, 2*charCount); - result = encoding::decode(result, "UTF-16LE"); - } - else { - // Note: this is COMPRESSED (not ASCII) encoding! Merely returning the raw bytes would - // work OK 99.99% of time if local codepage was cp1252 - however this would rapidly go - // pear-shaped for other codepages so return Unicode - result = data.substr(pos, charCount); - result = encoding::decode(result, "ISO-8859-1"); - } - return result; -} - -std::string Book::unpackUnicodeUpdatePos(const std::string& data, int& pos, - int length, int knownLength) const -{ - int charCount; - if (knownLength) - // On a NAME record, the length byte is detached from the front of the string - charCount = knownLength; - else { - charCount = readByte(data, pos, length); - pos += length; - } - // Zero-length string with no options byte - if (!charCount && data.substr(pos).empty()) - return ""; - - std::string result; - unsigned short rt = 0; - char options = data[pos]; - char phonetic = options & 0x04; - char richtext = options & 0x08; - int size = 0; - pos += 1; - if (richtext) { - rt = readByte(data, pos, 2); - pos += 2; - } - if (phonetic) { - size = readByte(data, pos, 4); - pos += 4; - } - if (options & 0x01) { - // Uncompressed UTF-16-LE - result = data.substr(pos, 2*charCount); - result = encoding::decode(result, "UTF-16LE"); - pos += 2 * charCount; - } - else { - // Note: this is COMPRESSED (not ASCII!) encoding!!! - result = data.substr(pos, charCount); - result = encoding::decode(result, "ISO-8859-1"); - pos += charCount; - } - if (richtext) - pos += 4 * rt; - if (phonetic) - pos += size; - return result; -} - - -// Book private: -int Book::getBiffVersion(int streamSign) { - unsigned short signature = readByte(m_workBook, m_position, 2); - unsigned short length = readByte(m_workBook, m_position + 2, 2); - //int savpos = m_position; - m_position += 4; - - if (find(BOF_CODES.begin(), BOF_CODES.end(), signature) == BOF_CODES.end()) - throw std::invalid_argument("Unsupported format, or corrupt file: Expected BOF record"); - if (length < 4 || length > 20) - throw std::invalid_argument( - "Unsupported format, or corrupt file: Invalid length (" + - std::to_string(length) +") for BOF record type " + std::to_string(signature) - ); - - std::string padding(std::max(0, BOF_LENGTH.at(signature) - length), '\0'); - std::string data = m_workBook.substr(m_position, length); - if (data.size() < length) - throw std::invalid_argument("Unsupported format, or corrupt file: Incomplete BOF record[2]"); - - m_position += length; - data += padding; - int version = 0; - int version1 = signature >> 8; - unsigned short version2 = readByte(data, 0, 2); - unsigned short streamType = readByte(data, 2, 2); - - if (version1 == 0x08) { - unsigned short build = readByte(data, 4, 2); - unsigned short year = readByte(data, 6, 2); - - if (version2 == 0x0600) { - version = 80; - } - else if (version2 == 0x0500) { - if (year < 1994 || (build == 2412 || build == 3218 || build == 3321)) - version = 50; - else - version = 70; - } - else { - // Dodgy one, created by a 3rd-party tool - std::unordered_map code { - {0x0000, 21}, {0x0007, 21}, - {0x0200, 21}, {0x0300, 30}, - {0x0400, 40} - }; - version = code[version2]; - } - } - else if (version1 == 0x04) - version = 40; - else if (version1 == 0x02) - version = 30; - else if (version1 == 0x00) - version = 21; - if (version == 40 && streamType == XL_WORKBOOK_GLOBALS_4W) - version = 45; - - bool gotGlobals = ( - streamType == XL_WORKBOOK_GLOBALS || - (version == 45 && streamType == XL_WORKBOOK_GLOBALS_4W) - ); - if ((streamSign == XL_WORKBOOK_GLOBALS && gotGlobals) || streamType == streamSign) - return version; - if (version < 50 && streamType == XL_WORKSHEET) - return version; - if (version >= 50 && streamType == 0x0100) - throw std::logic_error("Workspace file -- no spreadsheet data"); - throw std::logic_error("BOF not workbook/worksheet"); -} - -void Book::getFakeGlobalsSheet() { - Formatting formatting(this); - formatting.initializeBook(); - - m_sheetNames = {"Sheet 1"}; - m_sheetAbsolutePos = {0}; - m_sheetVisibility = {0}; // One sheet, visible - m_sheetList.emplace_back(Sheet(this, m_position, "Sheet 1", 0, m_contentText)); - size_t sheetCount = m_sheetNames.size(); - for (size_t i = 0; i < sheetCount; ++i) - getSheet(i); -} - -void Book::parseGlobals() { - // No need to position, just start reading (after the BOF) - Formatting formatting(this); - formatting.initializeBook(); - - while (true) { - unsigned short code; - unsigned short length; - std::string data; - getRecordParts(code, length, data); - - if (code == XL_SST) - handleSst(data); - else if (code == XL_FONT || code == XL_FONT_B3B4) - formatting.handleFont(data); - else if (code == XL_FORMAT) // XL_FORMAT2 is BIFF <= 3.0, can't appear in globals - formatting.handleFormat(data); - else if (code == XL_XF) - formatting.handleXf(data); - else if (code == XL_BOUNDSHEET) - handleBoundsheet(data); - else if (code == XL_DATEMODE) - m_dateMode = readByte(data, 0, 2); - else if (code == XL_CODEPAGE) { - m_codePage = readByte(data, 0, 2); - getEncoding(); - } - else if (code == XL_COUNTRY) { - m_countries = { - readByte(data, 0, 2), - readByte(data, 2, 2) - }; - } - else if (code == XL_EXTERNNAME) - handleExternalName(data); - else if (code == XL_EXTERNSHEET) - handleExternalSheet(data); - else if (code == XL_WRITEACCESS) - handleWriteAccess(data); - else if (code == XL_SHEETSOFFSET) - m_sheetOffset = readByte(data, 0, 4); - else if (code == XL_SHEETHDR) - handleSheethdr(data); - else if (code == XL_SUPBOOK) - handleSupbook(data); - else if (code == XL_NAME) - handleName(data); - else if (code == XL_PALETTE) - formatting.handlePalette(data); - else if (code == XL_STYLE) - formatting.handleStyle(data); - else if (code == XL_EOF) { - formatting.xfEpilogue(); - namesEpilogue(); - formatting.paletteEpilogue(); - if (m_encoding.empty()) - getEncoding(); - return; - } - } -} - -void Book::getSheet(size_t sheetId, bool shouldUpdatePos) { - if (shouldUpdatePos) - m_position = m_sheetAbsolutePos[sheetId]; - getBiffVersion(XL_WORKSHEET); - - // Add sheet information -// auto div = m_htmlTree.append_child("div"); -// div.append_attribute("id") = ("tabC"+ std::to_string(sheetId+1)).c_str(); -// auto table = m_htmlTree; - - m_sheetList.emplace_back(Sheet(this, m_position, m_sheetNames[sheetId], sheetId, m_contentText)); - m_sheetList.back().read(); -} - -void Book::handleSst(const std::string& data) { - std::vector stringList = {data}; - while (true) { - unsigned short code; - unsigned short length; - std::string data; - getRecordParts(code, length, data, XL_CONTINUE); - - if (!code) - break; - stringList.emplace_back(data); - } - unpackSst(stringList, readByte(data, 4, 4)); -} - -void Book::handleBoundsheet(const std::string& data) { - getEncoding(); - std::string sheetName; - unsigned char visibility; - unsigned char sheetType; - int absolutePos; - - if (m_biffVersion == 45) { // BIFF4W - // Not documented in OOo docs. In fact, the only data is the name of the sheet - sheetName = unpackString(data, 0, 1); - visibility = 0; - sheetType = XL_BOUNDSHEET_WORKSHEET; - // Note: - // (a) This won't be used - // (b) It's the position of the SHEETHDR record - // (c) Add 11 to get to the worksheet BOF record - if (m_sheetAbsolutePos.size() == 0) - absolutePos = m_sheetOffset + m_base; - else - absolutePos = -1; // Unknown - } - else { - int offset = readByte(data, 0, 4); - visibility = readByte(data, 4, 1); - sheetType = readByte(data, 5, 1); - absolutePos = offset + m_base; // Because global BOF is always at position 0 in the stream - if (m_biffVersion < 80) - sheetName = unpackString(data, 6, 1); - else - sheetName = unpackUnicode(data, 6, 1); - } - - if (sheetType != XL_BOUNDSHEET_WORKSHEET) - m_sheetMap.push_back(-1); - else { - int size = static_cast(m_sheetNames.size()); - m_sheetMap.push_back(size); - m_sheetNames.push_back(sheetName); - m_sheetAbsolutePos.push_back(absolutePos); - m_sheetVisibility.push_back(visibility); - m_sheetIdFromName[sheetName] = size; - } -} - -void Book::handleExternalName(const std::string& data) { - if (m_biffVersion >= 80) { - int pos = 6; - std::string name = unpackUnicodeUpdatePos(data, pos, 1); - if (m_supbookTypes.back() == SUPBOOK_ADDIN) - m_addinFuncNames.push_back(name); - } -} - -void Book::handleExternalSheet(std::string& data) { - getEncoding(); // If CODEPAGE record is missing/out of order/wrong - m_externalSheetCount++; - if (m_biffVersion >= 80) { - unsigned short numRefs = readByte(data, 0, 2); - while (data.size() < numRefs*6 + 2) { - unsigned short code; - unsigned short length; - std::string data2; - getRecordParts(code, length, data2); - if (code != XL_CONTINUE) - throw std::logic_error("Missing CONTINUE after EXTERNSHEET record"); - data += data2; - } - int pos = 2; - for (int k = 0; k < numRefs; ++k) { - m_externalSheetInfo.push_back({ - readByte(data, pos, 2), - readByte(data, pos+2, 2), - readByte(data, pos+4, 2) - }); - pos += 6; - } - } - else { - unsigned char size = readByte(data, 0, 1); - unsigned char type = readByte(data, 1, 1); - if (type == 3) - m_externalSheetNameFromId[m_externalSheetCount] = data.substr(2, size); - if (type < 1 || type > 4) - type = 0; - m_externalSheetTypes.push_back(type); - } -} - -void Book::handleSheethdr(const std::string& data) { - getEncoding(); - int sheetLength = readByte(data, 0, 4); - //std::string sheetName = unpackString(data, 4, 1); - int bofPosition = m_position; - m_sheethdrCount++; - - initializeFormatInfo(); - getSheet(m_sheethdrCount, false); - m_position = bofPosition + sheetLength; -} - -void Book::handleSupbook(const std::string& data) { - m_supbookTypes.push_back(-1); - unsigned short sheetCount = readByte(data, 0, 2); - m_supbookCount++; - if (data.substr(2, 2) == "\x01\x04") { - m_supbookTypes.back() = SUPBOOK_INTERNAL; - m_supbookLocalIndex = m_supbookCount - 1; - return; - } - if (data.substr(0, 4) == "\x01\x00\x01\x3A") { - m_supbookTypes.back() = SUPBOOK_ADDIN; - m_supbookAddinIndex = m_supbookCount - 1; - return; - } - - int pos = 2; - std::string url = unpackUnicodeUpdatePos(data, pos, 2); - if (sheetCount == 0) { - m_supbookTypes.back() = SUPBOOK_DDEOLE; - return; - } - m_supbookTypes.back() = SUPBOOK_EXTERNAL; - std::vector sheetNames; - for (int x = 0; x < sheetCount; ++x) { - try { - sheetNames.emplace_back(unpackUnicodeUpdatePos(data, pos, 2)); - } - catch (...) { - break; - } - } -} - -void Book::handleName(const std::string& data) { - if (m_biffVersion < 50) - return; - getEncoding(); - - unsigned short optionFlags = readByte(data, 0, 2); - //unsigned char kbShortcut = readByte(data, 2, 1); - unsigned char nameLength = readByte(data, 3, 1); - unsigned short formulaLength = readByte(data, 4, 2); - unsigned short externalSheetIndex = readByte(data, 6, 2); - unsigned short sheetIndex = readByte(data, 8, 2); - //unsigned char menuTextLength = readByte(data, 10, 1); - //unsigned char descriptionTextLength = readByte(data, 11, 1); - //unsigned char helpTextLength = readByte(data, 12, 1); - //unsigned char statusTextLength = readByte(data, 13, 1); - - m_nameObjList.emplace_back(Name(this)); - Name& nobj = m_nameObjList.back(); - nobj.m_nameIndex = m_nameObjList.size() - 1; - nobj.m_optionFlags = optionFlags; - nobj.m_isHidden = (optionFlags & 1) >> 0; - nobj.m_function = (optionFlags & 2) >> 1; - nobj.m_vbasic = (optionFlags & 4) >> 2; - nobj.m_macro = (optionFlags & 8) >> 3; - nobj.m_isComplex = (optionFlags & 0x10) >> 4; - nobj.m_builtIn = (optionFlags & 0x20) >> 5; - nobj.m_functionGroup = (optionFlags & 0xFC0) >> 6; - nobj.m_isBinary = (optionFlags & 0x1000) >> 12; - nobj.m_externalSheetIndex = externalSheetIndex; - nobj.m_excelSheetIndex = sheetIndex; - nobj.m_basicFormulaLength = formulaLength; - nobj.m_evaluated = 0; - nobj.m_scope = -5; // Patched up in the names_epilogue() method - - std::string internalName; - int pos = 14; - if (m_biffVersion < 80) - internalName = unpackStringUpdatePos(data, pos, 1, nameLength); - else - internalName = unpackUnicodeUpdatePos(data, pos, 2, nameLength); - - if (!nobj.m_builtIn) - nobj.m_name = internalName; - else if (BUILTIN_NAME_FROM_CODE.find(internalName) != BUILTIN_NAME_FROM_CODE.end()) - nobj.m_name = BUILTIN_NAME_FROM_CODE.at(internalName); - else - nobj.m_name = "??Unknown??"; - nobj.m_rawFormula = data.substr(pos); -} - -void Book::initializeFormatInfo() { - m_formatMap.clear(); - m_formatList.clear(); - m_xfCount = 0; - m_actualFormatCount = 0; // Number of FORMAT records seen so far - m_xfEpilogueDone = 0; - m_xfIndexXlTypeMap = {{0, XL_CELL_NUMBER}}; - m_xfList.clear(); - m_fontList.clear(); -} - -void Book::unpackSst(const std::vector& dataTable, int stringCount) { - std::string data = dataTable[0]; - int dataIndex = 0; - size_t dataSize = dataTable.size(); - size_t dataLength = data.size(); - int pos = 8; - m_sharedStrings.clear(); - if (m_addStyle) - m_richtextRunlistMap.clear(); - - for (int i = 0; i < stringCount; ++i) { - unsigned short charCount = readByte(data, pos, 2); - char options = data[pos + 2]; - int richTextCount = 0; - int phoneticSize = 0; - pos += 3; - if (options & 0x08) { // Richtext - richTextCount = readByte(data, pos, 2); - pos += 2; - } - if (options & 0x04) { // Phonetic - phoneticSize = readByte(data, pos, 4); - pos += 4; - } - std::string result; - int gotChars = 0; - while (true) { - int charsNeed = charCount - gotChars; - int charsAvailable; - std::string text; - if (options & 0x01) { - // Uncompressed UTF-16 - charsAvailable = std::min(((int)dataLength - pos) >> 1, charsNeed); - text = data.substr(pos, 2*charsAvailable); - text = encoding::decode(text, "UTF-16LE"); - pos += 2*charsAvailable; - } - else { - // Note: this is COMPRESSED (not ASCII!) encoding!!! - charsAvailable = std::min((int)dataLength - pos, charsNeed); - text = data.substr(pos, charsAvailable); - text = encoding::decode(text, "ISO-8859-1"); - pos += charsAvailable; - } - result += text; - gotChars += charsAvailable; - if (gotChars == charCount) - break; - dataIndex += 1; - data = dataTable[dataIndex]; - dataLength = data.size(); - options = data[0]; - pos = 1; - } - - if (richTextCount) { - std::vector> runs; - for (int j = 0; j < richTextCount; ++j) { - if (pos == static_cast(dataLength)) { - pos = 0; - dataIndex += 1; - data = dataTable[dataIndex]; - dataLength = data.size(); - } - runs.emplace_back(readByte(data, pos, 2), - readByte(data, pos+2, 2)); - pos += 4; - } - if (m_addStyle) - m_richtextRunlistMap[m_sharedStrings.size()] = runs; - } - - pos += phoneticSize; // Size of the phonetic stuff to skip - if (pos >= static_cast(dataLength)) { - // Adjust to correct position in next record - pos -= static_cast(dataLength); - dataIndex++; - if (dataIndex < static_cast(dataSize)) { - data = dataTable[dataIndex]; - dataLength = data.size(); - } - } - m_sharedStrings.push_back(result); - } -} - -void Book::namesEpilogue() { - size_t nameCount = m_nameObjList.size(); - for (size_t i = 0; i < nameCount; ++i) { - Name& name = m_nameObjList[i]; - int internalSheetIndex = -3; - // Convert from excelSheetIndex to scope. Done here because in BIFF7 and earlier - // the BOUNDSHEET records come after the NAME records - if (m_biffVersion >= 80) { - int sheetIndex = name.m_excelSheetIndex; - if (sheetIndex == 0) - internalSheetIndex = -1; // Global - else if (1 <= sheetIndex && sheetIndex <= static_cast(m_sheetMap.size())) { - internalSheetIndex = m_sheetMap[sheetIndex-1]; - if (internalSheetIndex == -1) // Maps to a macro or VBA sheet - internalSheetIndex = -2; // Valid sheet reference but not useful - } - else - internalSheetIndex = -3; // Invalid - } - else if (50 <= m_biffVersion && m_biffVersion <= 70) { - int sheetIndex = name.m_externalSheetIndex; - if (sheetIndex == 0) - internalSheetIndex = -1; // Global - else { - std::string sheetName = m_externalSheetNameFromId[sheetIndex]; - if (m_sheetIdFromName.find(sheetName) == m_sheetIdFromName.end()) - internalSheetIndex = m_sheetIdFromName[sheetName]; - else - internalSheetIndex = -2; - } - } - name.m_scope = static_cast(internalSheetIndex); - } - - Formula formula(this); - for (int i = 0; i < (int)nameCount; ++i) { - Name& name = m_nameObjList[i]; - // Parse the formula - if (name.m_macro || name.m_isBinary || name.m_evaluated) - continue; - formula.evaluateFormula(name, i); - } - - // Build some dicts for access to the name objects - m_nameScopeMap.clear(); - m_nameMap.clear(); - std::map>> nameMap; - for (int i = 0; i < (int)nameCount; ++i) { - Name& name = m_nameObjList[i]; - std::string nameName = name.m_name; - std::transform(nameName.begin(), nameName.end(), nameName.begin(), ::tolower); - - std::pair key {nameName, name.m_scope}; - m_nameScopeMap.erase(key); - m_nameScopeMap.emplace(key, name); - - nameMap[nameName].emplace_back(name, i); - } - for (auto & map : nameMap) { - std::sort(map.second.begin(), map.second.end()); - for (const auto& obj : map.second) - m_nameMap[map.first].emplace_back(obj.first); - } -} - - -// Name public: -Name::Name(Book* book) - : m_book(book) {} - -bool Name::operator < (const Name& name) const { - return m_scope < name.m_scope; -} - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/book.hpp b/3rdparty/libs/fileext/excel/book.hpp deleted file mode 100644 index c928a6f..0000000 --- a/3rdparty/libs/fileext/excel/book.hpp +++ /dev/null @@ -1,575 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file book.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 28.01.2018 - */ -#pragma once - -#include -#include -#include -#include -#include - -#include "fileext/cfb/cfb.hpp" - -#include "formula.hpp" -#include "sheet.hpp" -#include "frmt.hpp" - - -namespace excel { - -class Font; -class XFBorder; -class XFBackground; -class XF; -class Name; -class Sheet; -class Operand; - -/** - * @class Book - * @brief - * Excel Workbook data - */ -class Book: public cfb::Cfb { -public: - /** - * @param[in] fileName - * File name - * @param htmlTree - * Result HTML tree - * @param addStyle - * Should read and add styles to HTML-tree - * @param mergingMode - * Colspan/rowspan processing mode - * @since 1.0 - */ - Book(const std::string& fileName, std::string& text, bool addStyle); - - /** - * @brief - * Read XLS WorkBook - * @throw std::logic_error - * Can't determine file's BIFF version - * @throw std::invalid_argument - * BIFF version %1 is not supported - * @since 1.0 - */ - void openWorkbookXls(); - - /** - * @brief - * Get sheet write access - * @since 1.0 - */ - void handleWriteAccess(const std::string& data); - - /** - * @brief - * Read records parts - * @param[out] code - * Record type - * @param[out] length - * Record length - * @param[out] data - * Record content - * @param[in] condition - * Reading condition (return empty record) - * @since 1.0 - */ - void getRecordParts(unsigned short& code, unsigned short& length, - std::string& data, int condition = -1); - - /** - * @brief - * Get encoding from stream data - * @since 1.0 - */ - void getEncoding(); - - /** - * @brief - * Read binary string - * @param[in] data - * Binary data - * @param[in] pos - * Record size information start position - * @param[in] length - * Record size information length - * @return - * UTF-8 text - * @since 1.0 - */ - std::string unpackString(const std::string& data, int pos, int length = 1) const; - - /** - * @brief - * Read binary string and update position - * @param[in] data - * Binary data - * @param[in,out] pos - * Record size information start position - * @param[in] length - * Record size information length - * @param[in] knownLength - * Record size - * @return - * UTF-8 text - * @since 1.0 - */ - std::string unpackStringUpdatePos(const std::string& data, int& pos, int length = 1, - int knownLength = 0) const; - - /** - * @brief - * Convert binary text to UTF-8 - * @param[in] data - * Binary data - * @param[in] pos - * Record size information start position - * @param[in] length - * Record size information length - * @return - * UTF-8 text - * @since 1.0 - */ - std::string unpackUnicode(const std::string& data, int pos, int length = 2) const; - - /** - * @brief - * Convert binary text to UTF-8 and update position - * @param[in] data - * Binary data - * @param[in,out] pos - * Record size information start position - * @param[in] length - * Record size information length - * @param[in] knownLength - * Record size - * @return - * UTF-8 text - * @since 1.0 - */ - std::string unpackUnicodeUpdatePos(const std::string& data, int& pos, int length = 2, - int knownLength = 0) const; - - /** Result HTML tree */ - std::string& m_contentText; - /** Should read and add styles to HTML-tree */ - const bool m_addStyle; - /** Current position in the stream */ - int m_position = 0; - /** - * Version of BIFF (Binary Interchange File Format). Used to create the file. - * Latest is 8.0, introduced with Excel 97. Earliest supported by this module: 2.0 - */ - unsigned char m_biffVersion = 0; - /** The number of worksheets in workbook */ - size_t m_sheetCount; - /** All strings in workbook */ - std::vector m_sharedStrings; - /** Sheet list */ - std::vector m_sheetList; - /** Sheets names list */ - std::vector m_sheetNames; - /** Sheet visibility. From BOUNDSHEET record */ - std::vector m_sheetVisibility; - /** Maps an allSheets index to a calc-sheets index (or -1) */ - std::vector m_sheetMap; - /** Information about external sheets */ - std::vector> m_externalSheetInfo; - /** External sheet types */ - std::vector m_externalSheetTypes; - /** SUPBOOK local index */ - int m_supbookLocalIndex = 0; - /** SUPBOOK ADDIN index */ - int m_supbookAddinIndex = 0; - /** List containing object for each NAME record in the workbook */ - std::vector m_nameObjList; - /** Maps an NAME records to scope index */ - std::map, Name> m_nameScopeMap; - /** NAME record names map */ - std::map> m_nameMap; - /** List of FONT records */ - std::vector m_fontList; - /** List of XF records */ - std::vector m_xfList; - /** XF records count */ - int m_xfCount = 0; - /** IF XF records are finalized */ - bool m_xfEpilogueDone = false; - /** - * List of FORMAT records in the order that they appear in the input file. - * It doesn't contain built-in formats - */ - std::vector m_formatList; - /** Maps an XF formatKey to FORMAT. Used for all visual rendering purposes */ - std::unordered_map m_formatMap; - /** - * Provides access via name to `(builtIn, xfIndex)` - extended format information for - * built-in/user-defined styles - * Value | Description - * :---: | ----------- - * 0 | User-defined - * 1 | Built-in style - * Known built-in names are: - * - Normal - * - RowLevel_1 to RowLevel_7 - * - ColLevel_1 to ColLevel_7 - * - Comma - * - Currency - * - Percent - * - "Comma [0]" - * - "Currency [0]" - * - Hyperlink - * - "Followed Hyperlink" - */ - std::unordered_map> m_styleNameMap; - /** - * Provides definitions like `(red, green, blue)` for color indexes (`0x7FFF` maps to `None`) - * This is what you need if you want to render cells on screen or in PDF file - */ - std::unordered_map> m_colorMap; - /** List of used color indexes */ - std::unordered_map m_colorIndexUsed; - /** - * If user has changed any of colors in standard palette, XLS file will contain `PALETTE` - * record with 56 (16 for Excel 4.0 and earlier) RGB values in it. - * This is what you need if you are writing an output XLS file - */ - std::vector> m_paletteRecord; - /** List of already seen richtext records */ - std::unordered_map>> m_richtextRunlistMap; - /** Number of FORMAT records seen so far */ - int m_actualFormatCount = 0; - /** Number of built-in FORMAT records. Unknown as yet. BIFF 3, 4S, 4W */ - int m_builtinFormatCount = -1; - /** List of document properties (XLSX only) */ - std::unordered_map m_properties; - /** Maps XF index to XL type */ - std::map m_xfIndexXlTypeMap = {{0, 0}}; - /** List of cell's border objects */ - std::vector m_borderList; - /** List of cell's background objects */ - std::vector m_backgroundList; - /** - * Shows which date system was when this file was last saved - * Value | Description - * :---: | ----------- - * 0 | 1900 system (the Excel for Windows default) - * 1 | 1904 system (the Excel for Macintosh default) - */ - unsigned short m_dateMode = 0; - /** - * An integer denoting the character set used for strings in this file. - * For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE - * For earlier versions, this is used to derive the appropriate encoding. - * Example: - * @code `1252 -> 'cp1252'`, `10000 -> 'mac_roman'` @endcode - */ - unsigned short m_codePage = 0; - /** - * A tuple containing the telephone country code for: - * Value | Description - * :---: | ----------- - * 0 | User-interface setting when the file was created - * 1 | Regional settings - * This information may give a clue to the correct encoding for an unknown codepage. - * Example: - * @code `(1, 61)` -> `(USA, Australia)` @endcode - */ - std::pair m_countries {0, 0}; - /** What (if anything) is recorded as the name of the last user to save the file */ - std::string m_userName; - /** Encoding that was derived from the codepage */ - std::string m_encoding; - -private: - /** - * @brief - * Get BIFF version - * @param[in] streamSign - * Stream signature - * @return - * BIFF version - * @throw std::invalid_argument - * Unsupported format, or corrupt file: Expected BOF record - * @throw std::invalid_argument - * Unsupported format, or corrupt file: Invalid length %1 for BOF record type %2 - * @throw std::invalid_argument - * Unsupported format, or corrupt file: Incomplete BOF record[2] - * @throw std::logic_error - * Workspace file -- no spreadsheet data - * @throw std::logic_error - * BOF not workbook/worksheet - * @since 1.0 - */ - int getBiffVersion(int streamSign); - - /** - * @brief - * Get worksheet when BIFF version <= 40 (No workbook globals) - * @since 1.0 - */ - void getFakeGlobalsSheet(); - - /** - * @brief - * Read workbook globals - * @since 1.0 - */ - void parseGlobals(); - - /** - * @brief - * Get sheet by id - * @param[in] sheetId - * Sheet id - * @param[in] shouldUpdatePos - * Should update stream position - * @since 1.0 - */ - void getSheet(size_t sheetId, bool shouldUpdatePos = true); - - /** - * @brief - * Read SST (Shared Strings Table) data - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleSst(const std::string& data); - - /** - * @brief - * Read Boundsheet data - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleBoundsheet(const std::string& data); - - /** - * @brief - * Read External names - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleExternalName(const std::string& data); - - /** - * @brief - * Read External sheet - * @param[in] data - * Binary data - * @throw std::logic_error - * Missing CONTINUE after EXTERNSHEET record - * @since 1.0 - */ - void handleExternalSheet(std::string& data); - - /** - * @brief - * Read SHEETHDR record (is followed by (BOF ... EOF) substream). BIFF 4W special - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleSheethdr(const std::string& data); - - /** - * @brief - * Read SUPBOOK (EXTERNALBOOK in OOo docs) - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleSupbook(const std::string& data); - - /** - * @brief - * Read NAME records - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleName(const std::string & data); - - /** - * @brief - * Initialize format info. Needs to be done once per sheet for BIFF 4W - * @since 1.0 - */ - void initializeFormatInfo(); - - /** - * @brief - * Unpack SST (Shared Strings Table) to @ref m_sharedStrings - * @param[in] dataTable - * SST data - * @param[in] stringCount - * String count in SST - * @since 1.0 - */ - void unpackSst(const std::vector& dataTable, int stringCount); - - /** - * @brief - * Finalize NAME records - * @since 1.0 - */ - void namesEpilogue(); - - /** Workbook stream content */ - std::string m_workBook; - /** Stream start position */ - int m_base = 0; - /** Sheets absolute position in the stream */ - std::vector m_sheetAbsolutePos; - /** Sheets id and name relation */ - std::unordered_map m_sheetIdFromName; - /** Sheet records offset */ - int m_sheetOffset = 0; - /** External sheet count */ - int m_externalSheetCount = 0; - /** External sheets id and name relation */ - std::unordered_map m_externalSheetNameFromId; - /** Supbook types */ - std::vector m_supbookTypes; - /** SUPBOOK record count */ - int m_supbookCount = 0; - /** ADDIN functions names */ - std::vector m_addinFuncNames; - /** SHEETHDR record count. BIFF 4W only */ - int m_sheethdrCount = 0; - /** If raw user name */ - bool m_isRawUserName = false; -}; - - -/** - * @class Name - * @brief - * Information relating to a named reference, formula, macro, ... - */ -class Name { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - Name(Book* book); - - /** - * @brief - * Operator `<` overload - * @since 1.0 - */ - bool operator < (const Name& name) const; - - /** Pointer to parent Book object */ - Book* m_book; - /** - * If formula is hidden - * Value | Description - * :---: | ----------- - * False | Visible - * True | Hidden - */ - bool m_isHidden = false; - /** - * Function type. Relevant only if macro == 1 - * Value | Description - * :---: | ----------- - * False | Command macro - * True | Function macro - */ - bool m_function = false; - /** - * Macro type. Relevant only if macro == 1 - * Value | Description - * :---: | ----------- - * False | Sheet macro - * True | VisualBasic macro - */ - bool m_vbasic = false; - /** - * Formula name - * Value | Description - * :---: | ----------- - * False | Standard name - * True | Macro name - */ - bool m_macro = false; - /** - * Formula complexity - * Value | Description - * :---: | ----------- - * False | Simple formula - * True | Complex formula (array formula or user defined) - */ - bool m_isComplex = false; - /** - * Formula name type - * Value | Description - * :---: | ----------- - * False | User-defined name - * True | Built-in name - */ - bool m_builtIn = false; - /** Function group. Relevant only if macro == 1 */ - bool m_functionGroup = false; - /** - * If data is binary - * Value | Description - * :---: | ----------- - * False | Formula definition - * True | Binary data - */ - bool m_isBinary = false; - /** Index of this object in @ref Book.m_nameObjList */ - size_t m_nameIndex = 0; - /** Object name (Unicode string). If built-in, decoded as per OOo docs */ - std::string m_name; - /** 8-bit string */ - std::string m_rawFormula; - /** - * Name scope visibility - * Value | Description - * :---: | ----------- - * -1 | Name is global (visible in all calculation sheets) - * -2 | Name belongs to a macro sheet or VBA sheet - * -3 | Name is invalid - * 0 <= scope < sheetCount | Name is local to the sheet whose index is scope - */ - int m_scope = -1; - //** Result of evaluating the formula, if any */ - //Operand m_result; - /** Current sheet index */ - int m_excelSheetIndex; - /** External sheet index */ - int m_externalSheetIndex; - /** if formula is evaluated */ - bool m_evaluated; - /** Stack of formula operations */ - std::vector m_stack; - /** If has relations */ - bool m_hasRelation = false; - /** If has errors */ - bool m_hasError = false; - /** Function flags */ - unsigned short m_optionFlags; - /** VisualBasic formula length */ - unsigned short m_basicFormulaLength; - /** Formula text */ - std::string m_formulaText; -}; - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/excel.cpp b/3rdparty/libs/fileext/excel/excel.cpp index c2b3fd5..860a085 100644 --- a/3rdparty/libs/fileext/excel/excel.cpp +++ b/3rdparty/libs/fileext/excel/excel.cpp @@ -1,78 +1,45 @@ /** - * @brief Excel files (xls/xlsx) into HTML сonverter + * @brief Excel files (xls/xlsx) text extractor * @package excel * @file excel.cpp * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) * @date 02.12.2016 -- 28.01.2018 */ #include #include -#include #include "tools.hpp" -#include "book.hpp" -#include "xlsx.hpp" +#include "excel_xlsxio.hpp" +#include "excel_libxls.hpp" #include "excel.hpp" namespace excel { -/** Inline style */ -const std::string STYLE = "body{background:#fafafa}label{background:#f1f1f1;color:#aaa;" - "font-size:14px;font-weight:600;text-align:center;position:relative;" - "top:3px;margin:0 0 -1px;padding:10px;display:inline-block;" - "border:0 solid #ddd;border-width:1px;border-radius:3px 3px 0 0;" - "cursor:pointer}label:hover{color:#888}input{position:absolute;" - "left:-9999px}#tab10:checked~#tabL10,#tab11:checked~#tabL11," - "#tab12:checked~#tabL12,#tab13:checked~#tabL13,#tab14:checked~#tabL14," - "#tab15:checked~#tabL15,#tab16:checked~#tabL16,#tab17:checked~#tabL17," - "#tab18:checked~#tabL18,#tab19:checked~#tabL19,#tab1:checked~#tabL1," - "#tab20:checked~#tabL20,#tab2:checked~#tabL2,#tab3:checked~#tabL3," - "#tab4:checked~#tabL4,#tab5:checked~#tabL5,#tab6:checked~#tabL6," - "#tab7:checked~#tabL7,#tab8:checked~#tabL8,#tab9:checked~#tabL9{" - "width:intrinsic;background:#fff;color:#555;border-top:1px solid #093;" - "border-bottom:1px solid #fff;top:0;z-index:3}.tabContent{" - "background:#fff;position:relative;z-index:2;width:intrinsic}" - ".tabContent div{background:#fff;border:1px solid #ddd;padding:10px;" - "display:none;-webkit-transition:opacity .2s ease-in-out;" - "-moz-transition:opacity .2s ease-in-out;" - "transition:opacity .2s ease-in-out}#tab10:checked~.tabContent #tabC10," - "#tab11:checked~.tabContent #tabC11,#tab12:checked~.tabContent #tabC12," - "#tab13:checked~.tabContent #tabC13,#tab14:checked~.tabContent #tabC14," - "#tab15:checked~.tabContent #tabC15,#tab16:checked~.tabContent #tabC16," - "#tab17:checked~.tabContent #tabC17,#tab18:checked~.tabContent #tabC18," - "#tab19:checked~.tabContent #tabC19,#tab1:checked~.tabContent #tabC1," - "#tab20:checked~.tabContent #tabC20,#tab2:checked~.tabContent #tabC2," - "#tab3:checked~.tabContent #tabC3,#tab4:checked~.tabContent #tabC4," - "#tab5:checked~.tabContent #tabC5,#tab6:checked~.tabContent #tabC6," - "#tab7:checked~.tabContent #tabC7,#tab8:checked~.tabContent #tabC8," - "#tab9:checked~.tabContent #tabC9{display:inline-block}"; - -// public: Excel::Excel(const std::string& fileName, const std::string& extension) : FileExtension(fileName), m_extension(extension) {} int Excel::convert(bool addStyle, bool extractImages, char mergingMode) { - // Convert file - Book* book = new Book(m_fileName, m_text, false); - if (!strcasecmp(m_extension.c_str(), "xlsx")) { - Xlsx xlsx(book); - xlsx.openWorkbookXlsx(); - } else { - book->openWorkbookXls(); + int result = -1; + + if (!strcasecmp(m_extension.c_str(), "xlsx")) { + result = parseXlsxWithXlsxio(m_fileName, m_text); + } else { + result = parseXlsWithLibxls(m_fileName, m_text); + } + + if (result != 0) { + m_text.clear(); } - // Apply truncation if enabled if (m_truncationEnabled && m_text.size() > m_maxBytes) { m_text = truncateAtBoundary(m_text, m_maxBytes); m_truncated = true; } - delete book; - return 0; + return result; } } diff --git a/3rdparty/libs/fileext/excel/excel.hpp b/3rdparty/libs/fileext/excel/excel.hpp index 0dd0187..23fdd86 100644 --- a/3rdparty/libs/fileext/excel/excel.hpp +++ b/3rdparty/libs/fileext/excel/excel.hpp @@ -1,61 +1,29 @@ /** - * @brief Excel files (xls/xlsx) into HTML сonverter + * @brief Excel files (xls/xlsx) text extractor * @package excel * @file excel.hpp * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @version 1.1.1 - * @date 02.12.2016 -- 18.10.2017 + * @version 2.0.0 + * @date 02.12.2016 -- 27.04.2026 */ #pragma once #include -#include #include "fileext/fileext.hpp" -/** - * @namespace excel - * @brief - * Excel files (xls/xlsx) into HTML сonverter - */ namespace excel { -/** - * @class Excel - * @brief - * Excel files (xls/xlsx) into HTML сonverter - */ class Excel: public fileext::FileExtension { public: - /** - * @param[in] fileName - * File name - * @param[in] extension - * File extension - * @since 1.0 - */ Excel(const std::string& fileName, const std::string& extension); - /** Destructor */ virtual ~Excel() = default; - /** - * @brief - * Convert file to HTML-tree - * @param[in] addStyle - * Should read and add styles to HTML-tree - * @param[in] extractImages - * True if should extract images - * @param[in] mergingMode - * Colspan/rowspan processing mode - * @since 1.0 - */ - int convert(bool addStyle = true, bool extractImages = false, char mergingMode = 0) override; + int convert(bool addStyle = true, bool extractImages = false, char mergingMode = 0) override; private: - /** Input file extension (xls/xlsx) */ const std::string m_extension; }; diff --git a/3rdparty/libs/fileext/excel/excel_libxls.cpp b/3rdparty/libs/fileext/excel/excel_libxls.cpp new file mode 100644 index 0000000..9b12d25 --- /dev/null +++ b/3rdparty/libs/fileext/excel/excel_libxls.cpp @@ -0,0 +1,54 @@ +#include "excel_libxls.hpp" + +#include + +#include + +namespace excel { + +int parseXlsWithLibxls(const std::string& filename, std::string& output) { + xls::xls_error_t err; + xls::xlsWorkBook* wb = xls::xls_open_file(filename.c_str(), "UTF-8", &err); + if (!wb) { + return -1; + } + + err = xls::xls_parseWorkBook(wb); + if (err != xls::LIBXLS_OK) { + xls::xls_close_WB(wb); + return -1; + } + + for (int i = 0; i < wb->sheets.count; i++) { + xls::xlsWorkSheet* ws = xls::xls_getWorkSheet(wb, i); + if (!ws) { + continue; + } + + err = xls::xls_parseWorkSheet(ws); + if (err != xls::LIBXLS_OK) { + xls::xls_close_WS(ws); + continue; + } + + for (int row = 0; row <= ws->rows.lastrow; row++) { + for (int col = 0; col <= ws->rows.lastcol; col++) { + xls::xlsCell* cell = xls::xls_cell(ws, row, col); + if (!cell || cell->isHidden) { + continue; + } + if (cell->str && cell->str[0] != '\0') { + output += cell->str; + output += '\n'; + } + } + } + + xls::xls_close_WS(ws); + } + + xls::xls_close_WB(wb); + return 0; +} + +} // namespace excel diff --git a/3rdparty/libs/fileext/excel/excel_libxls.hpp b/3rdparty/libs/fileext/excel/excel_libxls.hpp new file mode 100644 index 0000000..d6d9f11 --- /dev/null +++ b/3rdparty/libs/fileext/excel/excel_libxls.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace excel { + +int parseXlsWithLibxls(const std::string& filename, std::string& output); + +} // namespace excel diff --git a/3rdparty/libs/fileext/excel/excel_xlsxio.cpp b/3rdparty/libs/fileext/excel/excel_xlsxio.cpp new file mode 100644 index 0000000..6496225 --- /dev/null +++ b/3rdparty/libs/fileext/excel/excel_xlsxio.cpp @@ -0,0 +1,53 @@ +#include "excel_xlsxio.hpp" + +#include + +#include +#include +#include + +namespace excel { + +static int sheetNameCallback(const char* name, void* callbackdata) { + auto* names = static_cast*>(callbackdata); + if (name) { + names->push_back(name); + } + return 0; +} + +int parseXlsxWithXlsxio(const std::string& filename, std::string& output) { + xlsxioreader handle = xlsxioread_open(filename.c_str()); + if (!handle) { + return -1; + } + + std::vector sheetNames; + xlsxioread_list_sheets(handle, sheetNameCallback, &sheetNames); + + for (const auto& sheetName : sheetNames) { + xlsxioreadersheet sheet = xlsxioread_sheet_open( + handle, sheetName.c_str(), XLSXIOREAD_SKIP_EMPTY_ROWS); + if (!sheet) { + continue; + } + + while (xlsxioread_sheet_next_row(sheet)) { + char* value = nullptr; + while ((value = xlsxioread_sheet_next_cell(sheet)) != nullptr) { + if (value[0] != '\0') { + output += value; + output += '\n'; + } + xlsxioread_free(value); + } + } + + xlsxioread_sheet_close(sheet); + } + + xlsxioread_close(handle); + return 0; +} + +} // namespace excel diff --git a/3rdparty/libs/fileext/excel/excel_xlsxio.hpp b/3rdparty/libs/fileext/excel/excel_xlsxio.hpp new file mode 100644 index 0000000..c56b8cd --- /dev/null +++ b/3rdparty/libs/fileext/excel/excel_xlsxio.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace excel { + +int parseXlsxWithXlsxio(const std::string& filename, std::string& output); + +} // namespace excel diff --git a/3rdparty/libs/fileext/excel/format.cpp b/3rdparty/libs/fileext/excel/format.cpp deleted file mode 100644 index ccc9893..0000000 --- a/3rdparty/libs/fileext/excel/format.cpp +++ /dev/null @@ -1,729 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file formatting.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 28.01.2018 - */ -#include - -#include "biffh.hpp" - -#include "format.hpp" - - -namespace excel { - -/** Standard for US English locale format code types */ -const std::unordered_map STD_FORMAT_CODE_TYPES { - {0, FGE}, {1, FNU}, {2, FNU}, {3, FNU}, {4, FNU}, - {5, FNU}, {6, FNU}, {7, FNU}, {8, FNU}, {9, FNU}, - {10, FNU}, {11, FNU}, {12, FNU}, {13, FNU}, {14, FDT}, - {15, FDT}, {16, FDT}, {17, FDT}, {18, FDT}, {19, FDT}, - {20, FDT}, {21, FDT}, {22, FDT}, {27, FDT}, {28, FDT}, - {29, FDT}, {30, FDT}, {31, FDT}, {32, FDT}, {33, FDT}, - {34, FDT}, {35, FDT}, {36, FDT}, {37, FNU}, {38, FNU}, - {39, FNU}, {40, FNU}, {41, FNU}, {42, FNU}, {43, FNU}, - {44, FNU}, {45, FDT}, {46, FDT}, {47, FDT}, {48, FNU}, - {49, FTX}, {50, FDT}, {51, FDT}, {52, FDT}, {53, FDT}, - {54, FDT}, {55, FDT}, {56, FDT}, {57, FDT}, {58, FDT}, - {59, FNU}, {60, FNU}, {61, FNU}, {62, FNU}, {67, FNU}, - {68, FNU}, {69, FNU}, {70, FNU}, {71, FDT}, {72, FDT}, - {73, FDT}, {74, FDT}, {75, FDT}, {76, FDT}, {77, FDT}, - {78, FDT}, {79, FDT}, {80, FDT}, {81, FDT} -}; -/** Standard for US English locale format string */ -const std::unordered_map STD_FORMAT_STRINGS { - {0x00, "General"}, - {0x01, "0"}, - {0x02, "0.00"}, - {0x03, "#,##0"}, - {0x04, "#,##0.00"}, - {0x05, "$#,##0_);($#,##0)"}, - {0x06, "$#,##0_);[Red]($#,##0)"}, - {0x07, "$#,##0.00_);($#,##0.00)"}, - {0x08, "$#,##0.00_);[Red]($#,##0.00)"}, - {0x09, "0%"}, - {0x0a, "0.00%"}, - {0x0b, "0.00E+00"}, - {0x0c, "# \?/\?"}, - {0x0d, "# \?\?/\?\?"}, - {0x0e, "m/d/yy"}, - {0x0f, "d-mmm-yy"}, - {0x10, "d-mmm"}, - {0x11, "mmm-yy"}, - {0x12, "h:mm AM/PM"}, - {0x13, "h:mm:ss AM/PM"}, - {0x14, "h:mm"}, - {0x15, "h:mm:ss"}, - {0x16, "m/d/yy h:mm"}, - {0x25, "#,##0_);(#,##0)"}, - {0x26, "#,##0_);[Red](#,##0)"}, - {0x27, "#,##0.00_);(#,##0.00)"}, - {0x28, "#,##0.00_);[Red](#,##0.00)"}, - {0x29, "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)"}, - {0x2a, "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)"}, - {0x2b, "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"\?\?_);_(@_)"}, - {0x2c, "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"\?\?_);_(@_)"}, - {0x2d, "mm:ss"}, - {0x2e, "[h]:mm:ss"}, - {0x2f, "mm:ss.0"}, - {0x30, "##0.0E+0"}, - {0x31, "@"} -}; -/** Non date formats */ -const std::unordered_map NON_DATE_FORMATS { - {"0.00E+00", 1}, - {"##0.0E+0", 1}, - {"General", 1}, - {"GENERAL", 1}, // OOo Calc 1.1.4 does this - {"general", 1}, // pyExcelerator 0.6.3 does this - {"@", 1} -}; -/** Built-in style name list */ -const std::vector BUILIT_STYLE_NAMES { - "Normal", - "RowLevel_", - "ColLevel_", - "Comma", - "Currency", - "Percent", - "Comma [0]", - "Currency [0]", - "Hyperlink", - "Followed Hyperlink" -}; -/** Skip charecters dictionary */ -const std::unordered_map SKIP_CHAR_DICT { - {' ', 1}, {'$', 1}, {')', 1}, {'(', 1}, - {'+', 1}, {'-', 1}, {'/', 1}, {':', 1} -}; -/** Date charecters dictionary */ -const std::unordered_map DATE_CHAR_DICT { - {'y', 5}, {'Y', 5}, {'m', 5}, {'M', 5}, - {'d', 5}, {'D', 5}, {'h', 5}, {'H', 5}, - {'s', 5}, {'S', 5} -}; -/** Number charecters dictionary */ -const std::unordered_map NUMBER_CHAR_DICT = { - {'0', 5}, {'#', 5}, {'?', 5} -}; -/** BIFF 5 default palette */ -const std::vector> DEFAULT_PALETTE_B5 { - { 0, 0, 0}, {255, 255, 255}, {255, 0, 0}, { 0, 255, 0}, - { 0, 0, 255}, {255, 255, 0}, {255, 0, 255}, { 0, 255, 255}, - {128, 0, 0}, { 0, 128, 0}, { 0, 0, 128}, {128, 128, 0}, - {128, 0, 128}, { 0, 128, 128}, {192, 192, 192}, {128, 128, 128}, - {153, 153, 255}, {153, 51, 102}, {255, 255, 204}, {204, 255, 255}, - {102, 0, 102}, {255, 128, 128}, { 0, 102, 204}, {204, 204, 255}, - { 0, 0, 128}, {255, 0, 255}, {255, 255, 0}, { 0, 255, 255}, - {128, 0, 128}, {128, 0, 0}, { 0, 128, 128}, { 0, 0, 255}, - { 0, 204, 255}, {204, 255, 255}, {204, 255, 204}, {255, 255, 153}, - {153, 204, 255}, {255, 153, 204}, {204, 153, 255}, {227, 227, 227}, - { 51, 102, 255}, { 51, 204, 204}, {153, 204, 0}, {255, 204, 0}, - {255, 153, 0}, {255, 102, 0}, {102, 102, 153}, {150, 150, 150}, - { 0, 51, 102}, { 51, 153, 102}, { 0, 51, 0}, { 51, 51, 0}, - {153, 51, 0}, {153, 51, 102}, { 51, 51, 153}, { 51, 51, 51} -}; -/** BIFF 2 default palette */ -const std::vector> DEFAULT_PALETTE_B2(DEFAULT_PALETTE_B5.begin(), - DEFAULT_PALETTE_B5.begin() + 16); -/** BIFF 8 default palette */ -const std::vector> DEFAULT_PALETTE_B8 { // {red, green, blue} - { 0, 0, 0}, {255, 255, 255}, {255, 0, 0}, { 0, 255, 0}, // 0 - { 0, 0, 255}, {255, 255, 0}, {255, 0, 255}, { 0, 255, 255}, // 4 - {128, 0, 0}, { 0, 128, 0}, { 0, 0, 128}, {128, 128, 0}, // 8 - {128, 0, 128}, { 0, 128, 128}, {192, 192, 192}, {128, 128, 128}, // 12 - {153, 153, 255}, {153, 51, 102}, {255, 255, 204}, {204, 255, 255}, // 16 - {102, 0, 102}, {255, 128, 128}, { 0, 102, 204}, {204, 204, 255}, // 20 - { 0, 0, 128}, {255, 0, 255}, {255, 255, 0}, { 0, 255, 255}, // 24 - {128, 0, 128}, {128, 0, 0}, { 0, 128, 128}, { 0, 0, 255}, // 28 - { 0, 204, 255}, {204, 255, 255}, {204, 255, 204}, {255, 255, 153}, // 32 - {153, 204, 255}, {255, 153, 204}, {204, 153, 255}, {255, 204, 153}, // 36 - { 51, 102, 255}, { 51, 204, 204}, {153, 204, 0}, {255, 204, 0}, // 40 - {255, 153, 0}, {255, 102, 0}, {102, 102, 153}, {150, 150, 150}, // 44 - { 0, 51, 102}, { 51, 153, 102}, { 0, 51, 0}, { 51, 51, 0}, // 48 - {153, 51, 0}, {153, 51, 102}, { 51, 51, 153}, { 51, 51, 51} // 52 -}; -/** XLS Default palette */ -const std::unordered_map>> DEFAULT_PALETTE { - {80, DEFAULT_PALETTE_B8}, - {70, DEFAULT_PALETTE_B5}, - {50, DEFAULT_PALETTE_B5}, - {45, DEFAULT_PALETTE_B2}, - {40, DEFAULT_PALETTE_B2}, - {30, DEFAULT_PALETTE_B2}, - {21, DEFAULT_PALETTE_B2}, - {20, DEFAULT_PALETTE_B2} -}; -/** Format bracketed text regex */ -const std::regex FORMAT_BRACKETED_TEXT("\\[[^\\]]*\\]"); - -// Formatting public: -Formatting::Formatting(Book* book) - : m_book(book) {} - -void Formatting::initializeBook() { - m_book->m_xfEpilogueDone = false; - if (!m_book->m_addStyle) - return; - // Add 8 invariant colors - for (int i = 0; i < 8; ++i) - m_book->m_colorMap[i] = DEFAULT_PALETTE_B8[i]; - // Add default palette depending on the version - auto& defaultPalette = DEFAULT_PALETTE.at(m_book->m_biffVersion); - int paletteSize = static_cast(defaultPalette.size()); - for (int i = 0; i < paletteSize; ++i) - m_book->m_colorMap[i+8] = defaultPalette[i]; - // Add the specials -- None means the RGB value is not known - // System window text color for border lines - //m_book->m_colorMap[paletteSize+8] = {255, 255, 255}; - // System window background color for pattern background - //m_book->m_colorMap[paletteSize+8+1] = {255, 255, 255}; - // System ToolTip text color (used in note objects) - m_book->m_colorMap[0x51] = {255, 255, 255}; - // 32767, system window text color for fonts - m_book->m_colorMap[0x7FFF] = {255, 255, 255}; -} - -void Formatting::handleFont(const std::string& data) { - if (!m_book->m_addStyle) - return; - if (m_book->m_encoding.empty()) - m_book->getEncoding(); - - int size = static_cast(m_book->m_fontList.size()); - if (size == 4) { - Font f; - f.m_name = "Dummy Font"; - f.m_fontIndex = size; - m_book->m_fontList.push_back(f); - size += 1; - } - Font f; - f.m_fontIndex = size; - unsigned short flags; - if (m_book->m_biffVersion >= 50) { - f.m_height = m_book->readByte(data, 0, 2); - flags = m_book->readByte(data, 2, 2); - f.m_color.m_index = m_book->readByte(data, 4, 2); - f.m_weight = m_book->readByte(data, 6, 2); - f.m_escapement = m_book->readByte(data, 8, 2); - f.m_underlineType = m_book->readByte(data, 10, 1); - f.m_family = m_book->readByte(data, 11, 1); - f.m_characterSet = m_book->readByte(data, 12, 1); - f.m_isBold = flags & 1; - f.m_isItalic = (flags & 2) >> 1; - f.m_isUnderlined = (flags & 4) >> 2; - f.m_isStruckOut = (flags & 8) >> 3; - f.m_isOutlined = (flags & 16) >> 4; - f.m_isShadowed = (flags & 32) >> 5; - if (m_book->m_biffVersion >= 80) - f.m_name = m_book->unpackUnicode(data, 14, 1); - else - f.m_name = m_book->unpackString(data, 14, 1); - } - else if (m_book->m_biffVersion >= 30) { - f.m_height = m_book->readByte(data, 0, 2); - flags = m_book->readByte(data, 2, 2); - f.m_color.m_index = m_book->readByte(data, 4, 2); - f.m_isBold = flags & 1; - f.m_isItalic = (flags & 2) >> 1; - f.m_isUnderlined = (flags & 4) >> 2; - f.m_isStruckOut = (flags & 8) >> 3; - f.m_isOutlined = (flags & 16) >> 4; - f.m_isShadowed = (flags & 32) >> 5; - f.m_name = m_book->unpackString(data, 6, 1); - f.m_weight = f.m_isBold ? 700 : 400; - f.m_escapement = 0; // None - f.m_underlineType = f.m_isUnderlined; // None or Single - f.m_family = 0; // Unknown / don't care - f.m_characterSet = 1; // System default (0 means "ANSI Latin") - } - else { // BIFF2 - f.m_height = m_book->readByte(data, 0, 2); - flags = m_book->readByte(data, 2, 2); - f.m_color.m_index = 0x7FFF; // "System window text color" - f.m_isBold = flags & 1; - f.m_isItalic = (flags & 2) >> 1; - f.m_isUnderlined = (flags & 4) >> 2; - f.m_isStruckOut = (flags & 8) >> 3; - f.m_isOutlined = 0; - f.m_isShadowed = 0; - f.m_name = m_book->unpackString(data, 4, 1); - f.m_weight = f.m_isBold ? 700 : 400; - f.m_escapement = 0; // None - f.m_underlineType = f.m_isUnderlined; // None or Single - f.m_family = 0; // Unknown / don't care - f.m_characterSet = 1; // System default (0 means "ANSI Latin") - } - m_book->m_fontList.push_back(f); -} - -void Formatting::handleFormat(const std::string& data, int recordType) { - if (!m_book->m_addStyle) - return; - int bv = m_book->m_biffVersion; - if (recordType == XL_FORMAT2) - bv = std::min(bv, 30); - if (!m_book->m_encoding.empty()) - m_book->getEncoding(); - - int position = 2; - unsigned short formatKey; - if (bv >= 50) - formatKey = m_book->readByte(data, 0, 2); - else { - formatKey = m_book->m_actualFormatCount; - if (bv <= 30) - position = 0; - } - m_book->m_actualFormatCount += 1; - - std::string unistrg; - if (bv >= 80) - unistrg = m_book->unpackUnicode(data, 2); - else - unistrg = m_book->unpackString(data, position, 1); - - int type = isDateFormattedString(unistrg) ? FDT : FGE; - Format format(formatKey, type, unistrg); - - m_book->m_formatMap.emplace(formatKey, format); - m_book->m_formatList.push_back(format); -} - -void Formatting::handleXf(const std::string& data) { - if (!m_book->m_addStyle) - return; - - XF xf; - xf.m_alignment.m_indentLevel = 0; - xf.m_alignment.m_isShrinkToFit = 0; - xf.m_alignment.m_textDirection = 0; - xf.m_border.m_diagUp = false; - xf.m_border.m_diagDown = false; - xf.m_border.m_diagColor.m_index = 0; - xf.m_border.m_diagLineStyle = 0; // No line - - // Fill in known standard formats, i.e. do this once before process first XF record - if (m_book->m_biffVersion >= 50 && !m_book->m_xfCount) { - for (const auto& x : STD_FORMAT_CODE_TYPES) { - if (m_book->m_formatMap.find(x.first) == m_book->m_formatMap.end()) { - // Note: many standard format codes (mostly CJK date formats) have format strings that - // vary by locale. Type (date or numeric) is recorded but formatString will be `None` - const int& type = STD_FORMAT_CODE_TYPES.at(x.first); - m_book->m_formatMap.emplace(x.first, Format(x.first, type, - STD_FORMAT_STRINGS.at(x.first))); - } - } - } - if (m_book->m_biffVersion >= 80) { - xf.m_fontIndex = m_book->readByte(data, 0, 2); - xf.m_formatKey = m_book->readByte(data, 2, 2); - unsigned short parTypeFlags = m_book->readByte(data, 4, 2); - unsigned char parAlign1 = m_book->readByte(data, 6, 1); - xf.m_alignment.m_rotation = m_book->readByte(data, 7, 1); - unsigned char parAlign2 = m_book->readByte(data, 8, 1); - unsigned char parUsed = m_book->readByte(data, 9, 1); - unsigned int borderBackgr1 = m_book->readByte(data, 10, 4); - int borderBackgr2 = m_book->readByte(data, 14, 4); - unsigned short borderBackgr3 = m_book->readByte(data, 18, 2); - - xf.m_protection.m_isCellLocked = (parTypeFlags & 0x01) >> 0; - xf.m_protection.m_isFormulaHidden = (parTypeFlags & 0x02) >> 1; - // Following is not in OOo docs, but is mentioned in Gnumeric source - xf.m_isStyle = (parTypeFlags & 0x0004) >> 2; - xf.m_lotusPrefix = (parTypeFlags & 0x0008) >> 3; // Meaning is not known - xf.m_parentStyleIndex = (parTypeFlags & 0xFFF0) >> 4; - - xf.m_alignment.m_horizontalAlign = (parAlign1 & 0x07) >> 0; // parAlign1 - xf.m_alignment.m_isTextWrapped = (parAlign1 & 0x08) >> 3; - xf.m_alignment.m_verticalAlign = (parAlign1 & 0x70) >> 4; - xf.m_alignment.m_indentLevel = (parAlign2 & 0x0f) >> 0; // parAlign2 - xf.m_alignment.m_isShrinkToFit = (parAlign2 & 0x10) >> 4; - xf.m_alignment.m_textDirection = (parAlign2 & 0xC0) >> 6; - - unsigned char flags = parUsed >> 2; - xf.m_formatFlag = (flags >> 0) & 1; - xf.m_fontFlag = (flags >> 1) & 1; - xf.m_alignmentFlag = (flags >> 2) & 1; - xf.m_borderFlag = (flags >> 3) & 1; - xf.m_backgroundFlag = (flags >> 4) & 1; - xf.m_protectionFlag = (flags >> 5) & 1; - - xf.m_border.m_leftLineStyle = (borderBackgr1 & 0x0000000f) >> 0; // borderBackgr1 - xf.m_border.m_rightLineStyle = (borderBackgr1 & 0x000000f0) >> 4; - xf.m_border.m_topLineStyle = (borderBackgr1 & 0x00000f00) >> 8; - xf.m_border.m_bottomLineStyle = (borderBackgr1 & 0x0000f000) >> 12; - xf.m_border.m_leftColor.m_index = (borderBackgr1 & 0x007f0000) >> 16; - xf.m_border.m_rightColor.m_index = (borderBackgr1 & 0x3f800000) >> 23; - xf.m_border.m_diagDown = (borderBackgr1 & 0x40000000) >> 30; - xf.m_border.m_diagUp = (borderBackgr1 & 0x80000000) >> 31; - xf.m_border.m_topColor.m_index = (borderBackgr2 & 0x0000007F) >> 0; // borderBackgr2 - xf.m_border.m_bottomColor.m_index = (borderBackgr2 & 0x00003F80) >> 7; - xf.m_border.m_diagColor.m_index = (borderBackgr2 & 0x001FC000) >> 14; - xf.m_border.m_diagLineStyle = (borderBackgr2 & 0x01E00000) >> 21; - - xf.m_background.m_fillPattern = (borderBackgr2 & 0xFC000000) >> 26; // borderBackgr2 - xf.m_background.m_patternColor.m_index = (borderBackgr3 & 0x007F) >> 0; // borderBackgr3 - xf.m_background.m_backgroundColor.m_index = (borderBackgr3 & 0x3F80) >> 7; - } - else if (m_book->m_biffVersion >= 50) { - xf.m_fontIndex = m_book->readByte(data, 0, 2); - xf.m_formatKey = m_book->readByte(data, 2, 2); - unsigned short parTypeFlags = m_book->readByte(data, 4, 2); - unsigned char parAlign1 = m_book->readByte(data, 6, 1); - unsigned char orientUsed = m_book->readByte(data, 7, 1); - unsigned int borderBackgr1 = m_book->readByte(data, 8, 4); - int borderBackgr2 = m_book->readByte(data, 12, 4); - - xf.m_protection.m_isCellLocked = (parTypeFlags & 0x01) >> 0; - xf.m_protection.m_isFormulaHidden = (parTypeFlags & 0x02) >> 1; - - xf.m_isStyle = (parTypeFlags & 0x0004) >> 2; - xf.m_lotusPrefix = (parTypeFlags & 0x0008) >> 3; // Meaning is not known - xf.m_parentStyleIndex = (parTypeFlags & 0xFFF0) >> 4; - - xf.m_alignment.m_horizontalAlign = (parAlign1 & 0x07) >> 0; - xf.m_alignment.m_isTextWrapped = (parAlign1 & 0x08) >> 3; - xf.m_alignment.m_verticalAlign = (parAlign1 & 0x70) >> 4; - - int orientation[] = {0, 255, 90, 180}; - xf.m_alignment.m_rotation = orientation[orientUsed & 0x03]; - - unsigned char flags = orientUsed >> 2; - xf.m_formatFlag = (flags >> 0) & 1; - xf.m_fontFlag = (flags >> 1) & 1; - xf.m_alignmentFlag = (flags >> 2) & 1; - xf.m_borderFlag = (flags >> 3) & 1; - xf.m_backgroundFlag = (flags >> 4) & 1; - xf.m_protectionFlag = (flags >> 5) & 1; - - xf.m_border.m_bottomLineStyle = (borderBackgr1 & 0x01C00000) >> 22; // borderBackgr1 - xf.m_border.m_bottomColor.m_index = (borderBackgr1 & 0xFE000000) >> 25; - xf.m_border.m_topLineStyle = (borderBackgr2 & 0x00000007) >> 0; // borderBackgr2 - xf.m_border.m_leftLineStyle = (borderBackgr2 & 0x00000038) >> 3; - xf.m_border.m_rightLineStyle = (borderBackgr2 & 0x000001C0) >> 6; - xf.m_border.m_topColor.m_index = (borderBackgr2 & 0x0000FE00) >> 9; - xf.m_border.m_leftColor.m_index = (borderBackgr2 & 0x007F0000) >> 16; - xf.m_border.m_rightColor.m_index = (borderBackgr2 & 0x3F800000) >> 23; - - xf.m_background.m_patternColor.m_index = (borderBackgr1 & 0x0000007F) >> 0; - xf.m_background.m_backgroundColor.m_index = (borderBackgr1 & 0x00003F80) >> 7; - xf.m_background.m_fillPattern = (borderBackgr1 & 0x003F0000) >> 16; - } - else if (m_book->m_biffVersion >= 40) { - xf.m_fontIndex = m_book->readByte(data, 0, 1); - xf.m_formatKey = m_book->readByte(data, 1, 1); - unsigned short parTypeFlags = m_book->readByte(data, 2, 2); - unsigned char alignOrient = m_book->readByte(data, 4, 1); - unsigned char parUsed = m_book->readByte(data, 5, 1); - unsigned short background34 = m_book->readByte(data, 6, 2); - unsigned int border34 = m_book->readByte(data, 8, 4); - - xf.m_protection.m_isCellLocked = (parTypeFlags & 0x01) >> 0; - xf.m_protection.m_isFormulaHidden = (parTypeFlags & 0x02) >> 1; - - xf.m_isStyle = (parTypeFlags & 0x0004) >> 2; - xf.m_lotusPrefix = (parTypeFlags & 0x0008) >> 3; // Meaning is not known - xf.m_parentStyleIndex = (parTypeFlags & 0xFFF0) >> 4; - - xf.m_alignment.m_horizontalAlign = (alignOrient & 0x07) >> 0; - xf.m_alignment.m_isTextWrapped = (alignOrient & 0x08) >> 3; - xf.m_alignment.m_verticalAlign = (alignOrient & 0x30) >> 4; - - int orientation[] = {0, 255, 90, 180}; - xf.m_alignment.m_rotation = orientation[(alignOrient & 0xC0) >> 6]; - - unsigned char flags = parUsed >> 2; - xf.m_formatFlag = (flags >> 0) & 1; - xf.m_fontFlag = (flags >> 1) & 1; - xf.m_alignmentFlag = (flags >> 2) & 1; - xf.m_borderFlag = (flags >> 3) & 1; - xf.m_backgroundFlag = (flags >> 4) & 1; - xf.m_protectionFlag = (flags >> 5) & 1; - - xf.m_border.m_topLineStyle = (border34 & 0x00000007) >> 0; - xf.m_border.m_topColor.m_index = (border34 & 0x000000F8) >> 3; - xf.m_border.m_leftLineStyle = (border34 & 0x00000700) >> 8; - xf.m_border.m_leftColor.m_index = (border34 & 0x0000F800) >> 11; - xf.m_border.m_bottomLineStyle = (border34 & 0x00070000) >> 16; - xf.m_border.m_bottomColor.m_index = (border34 & 0x00F80000) >> 19; - xf.m_border.m_rightLineStyle = (border34 & 0x07000000) >> 24; - xf.m_border.m_rightColor.m_index = (border34 & 0xF8000000) >> 27; - - xf.m_background.m_fillPattern = (background34 & 0x003F) >> 0; - xf.m_background.m_patternColor.m_index = (background34 & 0x07C0) >> 6; - xf.m_background.m_backgroundColor.m_index = (background34 & 0xF800) >> 11; - } - else if (m_book->m_biffVersion == 30) { - xf.m_fontIndex = m_book->readByte(data, 0, 1); - xf.m_formatKey = m_book->readByte(data, 1, 1); - unsigned char protectType = m_book->readByte(data, 2, 1); - unsigned char parUsed = m_book->readByte(data, 3, 1); - unsigned short parAlign = m_book->readByte(data, 4, 2); - unsigned short background34 = m_book->readByte(data, 6, 2); - unsigned int border34 = m_book->readByte(data, 8, 4); - - xf.m_protection.m_isCellLocked = (protectType & 0x01) >> 0; - xf.m_protection.m_isFormulaHidden = (protectType & 0x02) >> 1; - - xf.m_isStyle = (protectType & 0x0004) >> 2; // protectType - xf.m_lotusPrefix = (protectType & 0x0008) >> 3; // Meaning is not known - xf.m_parentStyleIndex = (parAlign & 0xFFF0) >> 4; // parAlign - - xf.m_alignment.m_horizontalAlign = (parAlign & 0x07) >> 0; - xf.m_alignment.m_isTextWrapped = (parAlign & 0x08) >> 3; - xf.m_alignment.m_verticalAlign = 2; // Bottom - xf.m_alignment.m_rotation = 0; - - unsigned char flags = parUsed >> 2; - xf.m_formatFlag = (flags >> 0) & 1; - xf.m_fontFlag = (flags >> 1) & 1; - xf.m_alignmentFlag = (flags >> 2) & 1; - xf.m_borderFlag = (flags >> 3) & 1; - xf.m_backgroundFlag = (flags >> 4) & 1; - xf.m_protectionFlag = (flags >> 5) & 1; - - xf.m_border.m_topLineStyle = (border34 & 0x00000007) >> 0; - xf.m_border.m_topColor.m_index = (border34 & 0x000000F8) >> 3; - xf.m_border.m_leftLineStyle = (border34 & 0x00000700) >> 8; - xf.m_border.m_leftColor.m_index = (border34 & 0x0000F800) >> 11; - xf.m_border.m_bottomLineStyle = (border34 & 0x00070000) >> 16; - xf.m_border.m_bottomColor.m_index = (border34 & 0x00F80000) >> 19; - xf.m_border.m_rightLineStyle = (border34 & 0x07000000) >> 24; - xf.m_border.m_rightColor.m_index = (border34 & 0xF8000000) >> 27; - - xf.m_background.m_fillPattern = (background34 & 0x003F) >> 0; - xf.m_background.m_patternColor.m_index = (background34 & 0x07C0) >> 6; - xf.m_background.m_backgroundColor.m_index = (background34 & 0xF800) >> 11; - } - else if (m_book->m_biffVersion == 21) { - // Warning: incomplete treatment; formatting_info not fully supported. Probably need - // to offset incoming BIFF2 XF[n] to BIFF8-like XF[n+16], and create XF[0:16] like - // the standard ones in BIFF8 *AND* add 16 to all XF references in cell records - xf.m_fontIndex = m_book->readByte(data, 0, 1); - unsigned char format_etc = m_book->readByte(data, 1, 1); - unsigned char halign_etc = m_book->readByte(data, 2, 1); - xf.m_formatKey = format_etc & 0x3F; - - xf.m_protection.m_isCellLocked = (format_etc & 0x40) >> 6; - xf.m_protection.m_isFormulaHidden = (format_etc & 0x80) >> 7; - - xf.m_parentStyleIndex = 0; // ??????????? - - xf.m_alignment.m_horizontalAlign = (halign_etc & 0x07) >> 0; - xf.m_alignment.m_verticalAlign = 2; // Bottom - xf.m_alignment.m_rotation = 0; - - xf.m_border.m_leftLineStyle = (halign_etc & 0x08) ? 1 : 0; // 1 - thin - xf.m_border.m_leftColor.m_index = (halign_etc & 0x08) ? 8 : 0; // 8 - black - xf.m_border.m_rightLineStyle = (halign_etc & 0x10) ? 1 : 0; - xf.m_border.m_rightColor.m_index = (halign_etc & 0x10) ? 8 : 0; - xf.m_border.m_topLineStyle = (halign_etc & 0x20) ? 1 : 0; - xf.m_border.m_topColor.m_index = (halign_etc & 0x20) ? 8 : 0; - xf.m_border.m_bottomLineStyle = (halign_etc & 0x40) ? 1 : 0; - xf.m_border.m_bottomColor.m_index = (halign_etc & 0x40) ? 8 : 0; - - xf.m_formatFlag = true; - xf.m_fontFlag = true; - xf.m_alignmentFlag = true; - xf.m_borderFlag = true; - xf.m_backgroundFlag = true; - xf.m_protectionFlag = true; - - xf.m_background.m_fillPattern = (halign_etc & 0x80) ? 17 : 0; - xf.m_background.m_patternColor.m_index = 8; // Black - xf.m_background.m_backgroundColor.m_index = 9; // White - } - else { - throw std::logic_error("programmer stuff-up: bv=" + std::to_string(m_book->m_biffVersion)); - } - - xf.m_xfIndex = static_cast(m_book->m_xfList.size()); - int cellType = XL_CELL_NUMBER; - if (m_book->m_formatMap.find(xf.m_formatKey) != m_book->m_formatMap.end()) { - unsigned char type = m_book->m_formatMap[xf.m_formatKey].m_type; - cellType = CELL_TYPE_FROM_FORMAT_TYPE.at(type); - } - m_book->m_xfIndexXlTypeMap[xf.m_xfIndex] = cellType; - - if (m_book->m_formatMap.find(xf.m_formatKey) == m_book->m_formatMap.end()) - xf.m_formatKey = 0; - - m_book->m_xfList.push_back(xf); - m_book->m_xfCount++; -} - -void Formatting::handlePalette(const std::string& data) { - if (!m_book->m_addStyle) - return; - unsigned short colorCount = m_book->readByte(data, 0, 2); - //int expectedColorCount = (m_book->m_biffVersion >= 50) ? 56 : 16; - int expectedSize = 4 * colorCount + 2; - int actualSize = static_cast(data.size()); - int tolerance = 4; - if (expectedSize > actualSize || actualSize > expectedSize + tolerance) - throw std::logic_error( - "PALETTE record: expected size "+ std::to_string(expectedSize) + - ", actual size "+ std::to_string(actualSize) - ); - - // Color will be 0xbbggrr. IOW, red is at the little end - for (int i = 0; i < colorCount; ++i) { - int color = m_book->readByte(data, 4*i + 2, 4); - unsigned char red = color & 0xff; - unsigned char green = (color >> 8) & 0xff; - unsigned char blue = (color >> 16) & 0xff; - m_book->m_paletteRecord.push_back({red, green, blue}); - m_book->m_colorMap[8+i] = {red, green, blue}; - } -} - -void Formatting::handleStyle(const std::string& data) { - if (!m_book->m_addStyle) - return; - unsigned short flagAndXfx = m_book->readByte(data, 0, 2); - unsigned char builtinId = m_book->readByte(data, 2, 1); - unsigned char level = m_book->readByte(data, 3, 1); - unsigned short xfIndex = flagAndXfx & 0x0fff; - - bool builtIn; - std::string name; - // Erroneous record (doesn't have built-in bit set) - if (data == "\0\0\0\0" && m_book->m_styleNameMap.find("Normal") == m_book->m_styleNameMap.end()) { - builtIn = true; - xfIndex = 0; - name = "Normal"; - } - // Built-in style - else if (flagAndXfx & 0x8000) { - builtIn = true; - name = BUILIT_STYLE_NAMES[builtinId]; - if (1 <= builtinId && builtinId <= 2) - name += std::to_string(level + 1); - } - // User-defined style - else { - builtIn = false; - builtinId = 0; - level = 0; - if (m_book->m_biffVersion >= 80) - name = m_book->unpackUnicode(data, 2, 2); - else - name = m_book->unpackString(data, 2, 1); - } - m_book->m_styleNameMap[name] = {builtIn, xfIndex}; -} - -void Formatting::xfEpilogue() { - if (!m_book->m_addStyle) - return; - - m_book->m_xfEpilogueDone = true; - size_t xfCount = m_book->m_xfList.size(); - - for (size_t i = 0; i < xfCount; ++i) { - XF& xf = m_book->m_xfList[i]; - - int cellType = XL_CELL_TEXT; - if (m_book->m_formatMap.find(xf.m_formatKey) != m_book->m_formatMap.end()) { - unsigned char type = m_book->m_formatMap[xf.m_formatKey].m_type; - cellType = CELL_TYPE_FROM_FORMAT_TYPE.at(type); - } - m_book->m_xfIndexXlTypeMap[xf.m_xfIndex] = cellType; - - // Now for some assertions - if (!m_book->m_addStyle || xf.m_isStyle) - continue; - if (xf.m_parentStyleIndex < 0 || xf.m_parentStyleIndex >= static_cast(xfCount)) - xf.m_parentStyleIndex = 0; - } -} - -void Formatting::paletteEpilogue() { - for (const auto& font : m_book->m_fontList) { - if (font.m_fontIndex == 4 || font.m_color.m_index == 0x7fff) // Missing font record - continue; - if (m_book->m_colorMap.find(font.m_color.m_index) != m_book->m_colorMap.end()) - m_book->m_colorIndexUsed[font.m_color.m_index] = 1; - } -} - -bool Formatting::isDateFormattedString(const std::string& format) { - int state = 0; - std::string str; - std::string ch = "\\_*"; - - for (const auto& c : format) { - if (state == 0) { - if (c == '"') - state = 1; - else if (find(ch.begin(), ch.end(), c) == ch.end()) - state = 2; - else if (SKIP_CHAR_DICT.find(c) == SKIP_CHAR_DICT.end()) - str += c; - } - else if (state == 1) { - if (c == '"') - state = 0; - } - // Ignore char after backslash, underscore or asterisk - else if (state == 2) { - state = 0; - } - } - - str = std::regex_replace(str, FORMAT_BRACKETED_TEXT, ""); - if (NON_DATE_FORMATS.find(str) != NON_DATE_FORMATS.end()) - return false; - - state = 0; - //int gotSep = 0; - int dateCount = 0; - int numCount = 0; - for (const auto& c : str) { - if (DATE_CHAR_DICT.find(c) == DATE_CHAR_DICT.end()) - dateCount += DATE_CHAR_DICT.at(c); - else if (NUMBER_CHAR_DICT.find(c) == NUMBER_CHAR_DICT.end()) - numCount += NUMBER_CHAR_DICT.at(c); - //else if (c == ';') - // got_sep = 1; - } - - if (dateCount && !numCount) - return true; - if (numCount && !dateCount) - return false; - return (dateCount > numCount); -} - -int Formatting::getNearestColorIndex(std::unordered_map>& colorMap, - std::vector& rgb) -{ - int bestMetric = 196608; // 3*256*256 - int bestColor = 0; - for (auto const & map : colorMap) { - if (map.second.empty()) - continue; - int metric = 0; - for (size_t i = 0; i < rgb.size(); ++i) - metric += (rgb[i] - map.second[i]) * (rgb[i] - map.second[i]); - if (metric < bestMetric) { - bestMetric = metric; - bestColor = map.first; - if (metric == 0) - break; - } - } - return bestColor; -} - - -// Format public: -Format::Format(unsigned short formatKey, unsigned char type, std::string formatString) -: m_formatKey(formatKey), m_type(type), m_formatString(formatString) {} - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/format.hpp b/3rdparty/libs/fileext/excel/format.hpp deleted file mode 100644 index 72376d6..0000000 --- a/3rdparty/libs/fileext/excel/format.hpp +++ /dev/null @@ -1,573 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file format.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 18.10.2017 - */ -#pragma once - -#include -#include -#include - -#include "biffh.hpp" -#include "book.hpp" -#include "frmt.hpp" - -namespace excel { - -class Book; - -/** - * @class Formatting - * @brief - * Workbook formatting information - */ -class Formatting { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - Formatting(Book* book); - - /** - * @brief - * Initialize formatting for workbook - * @since 1.0 - */ - void initializeBook(); - - /** - * @brief - * Read FONT record - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleFont(const std::string& data); - - /** - * @brief - * Read FORMAT record - * @param[in] data - * Binary data - * @param[in] recordType - * Record type - * @since 1.0 - */ - void handleFormat(const std::string& data, int recordType = XL_FORMAT); - - /** - * @brief - * Read XF record - * @param[in] data - * Binary data - * @throw std::logic_error - * Programmer stuff-up: bv=%1 - * @since 1.0 - */ - void handleXf(const std::string& data); - - /** - * @brief - * Read palette and color records - * @param[in] data - * Binary data - * @throw std::logic_error - * PALETTE record: expected size %1, actual size %2 - * @since 1.0 - */ - void handlePalette(const std::string& data); - - /** - * @brief - * Read style indexes records - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleStyle(const std::string& data); - - /** - * @brief - * Finalize XF records - * @since 1.0 - */ - void xfEpilogue(); - - /** - * @brief - * Check color indexes in fonts. FONT records must come before PALETTE record - * @since 1.0 - */ - void paletteEpilogue(); - - /** - * @brief - * Check if formatted string is date - * @details - * Heuristics. Handle backslashed-escaped chars properly (`hh\hmm\mss\s` -> `23h59m59s`). - * Date formats have one or more of ymdhs (caseless) in them. Numeric formats have # and 0. - * @param[in] format - * Formatted string - * @return - * True if format string is date - * @since 1.0 - */ - static bool isDateFormattedString(const std::string& format); - - /** - * @brief - * Find nearest object color index in palette - * @details - * Uses Euclidean distance. Used only for pre-BIFF8 `WINDOW2` record. - * Doesn't fast and fancy. - * @param[in] colorMap - * Color map - * @param[in] rgb - * Object color like `(red, green, blue)` - * @return - * Color index in palette - * @since 1.0 - */ - static int getNearestColorIndex(std::unordered_map>& colorMap, - std::vector& rgb); - - /** Pointer to parent Book object */ - Book* m_book; -}; - - -/** - * @class XFColor - * @brief - * Color information for font, background, borders - */ -class XFColor { -public: - XFColor() = default; - - /** - * Color representing form - * Value | Description - * :---: | ----------- - * False | Color is represented by index in color map - * True | Color is represented in raw RGB form - */ - bool m_isRgb = false; - /** Color index */ - int m_index = -1; - /** - * Color tint - * Value | Formula - * :---: | ------- - * < 0 | color = color * (1 + tint) - * > 0 | color = color * (1 - tint) + (255 - 255 * (1 - tint)) - */ - double m_tint = 0; - /** Object color like `(red, green, blue)` */ - std::vector m_rgb; -}; - - -/** - * @class Font - * @brief - * Contains details of not only what is normally considered a font, but also several - * other display attributes - * @details - * Items correspond to those in the Excel UI's `Format` -> `Cells` -> `Font` tab. - */ -class Font { -public: - Font() = default; - - /** - * If text is bold. Redundant, see "weight" attribute - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are bold - */ - bool m_isBold = false; - /** - * If text is italic - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are italic - */ - bool m_isItalic = false; - /** - * Underline types - * Value | Description - * :---: | ----------- - * 0 | None - * 1 | Single. 0x21 (33) = Single accounting - * 2 | Double. 0x22 (34) = Double accounting - */ - unsigned char m_underlineType = 0; - /** - * If text is underlined. Redundant - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are underlined - */ - bool m_isUnderlined = false; - /** - * If text is struck out - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are struck out - */ - bool m_isStruckOut = false; - /** - * If text is outlined (Macintosh only) - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are outlined - */ - bool m_isOutlined = false; - /** - * If text is shadowed (Macintosh only) - * Value | Description - * :---: | ----------- - * False | Normal text - * True | Characters are shadowed - */ - bool m_isShadowed = false; - /** Font weight (100-1000). Standard values are 400 for normal text and 700 for bold text */ - unsigned short m_weight = 400; - /** - * Character set values - * Value | Description - * :---: | ----------- - * 0 | ANSI Latin - * 1 | System default - * 2 | Symbol - * 77 | Apple Roman - * 128 | ANSI Japanese Shift-JIS - * 129 | ANSI Korean (Hangul) - * 130 | ANSI Korean (Johab) - * 134 | ANSI Chinese Simplified GBK - * 136 | ANSI Chinese Traditional BIG5 - * 161 | ANSI Greek - * 162 | ANSI Turkish - * 163 | ANSI Vietnamese - * 177 | ANSI Hebrew - * 178 | ANSI Arabic - * 186 | ANSI Baltic - * 204 | ANSI Cyrillic - * 222 | ANSI Thai - * 238 | ANSI Latin II (Central European) - * 255 | OEM Latin I - */ - unsigned char m_characterSet = 0; - /** Font color */ - XFColor m_color; - /** - * Text escapment - * Value | Description - * :---: | ----------- - * 1 | Superscript - * 2 | Subscript - */ - unsigned short m_escapement = 0; - /** - * Font family - * Value | Description - * :---: | ----------- - * 0 | None (unknown or don't care) - * 1 | Roman (variable width, serifed) - * 2 | Swiss (variable width, sans-serifed) - * 3 | Modern (fixed width, serifed or sans-serifed) - * 4 | Script (cursive) - * 5 | Decorative (specialised, e.g. Old English, Fraktur) - */ - unsigned char m_family = 0; - /** Font name */ - std::string m_name; - /** 0-based font index used to refer to this object. Note that index 4 is never used */ - int m_fontIndex = 0; - /** Font height (in twips). A twip = 1/20 of a point */ - unsigned short m_height = 0; -}; - - -/** - * @class XFAlignment - * @brief - * A collection of alignment and similar attributes of XF record - * @details - * Items correspond to those in the Excel UI's `Format` -> `Cells` -> `Alignment` tab. - */ -class XFAlignment { -public: - XFAlignment() = default; - - /** - * Cell horizontal alignment. Section 6.115 (p 214) of OOo docs - * Value | Description - * :---: | ----------- - * 0 | General - * 1 | Left - * 2 | Centred - * 3 | Right - * 4 | Filled - * 5 | Justified (BIFF4-BIFF8X) - * 6 | Centred across selection (BIFF4-BIFF8X) - * 7 | Distributed (BIFF8X) - */ - int m_horizontalAlign = 0; - /** - * Cell vertical alignment. Section 6.115 (p 215) of OOo docs - * Value | Description - * :---: | ----------- - * 0 | Top - * 1 | Centred - * 2 | Bottom - * 3 | Justified (BIFF5-BIFF8X) - * 4 | Distributed (BIFF8X) - */ - int m_verticalAlign = 0; - /** - * Text rotation. Section 6.115 (p 215) of OOo docs. - * File versions BIFF7 and earlier use the documented `orientation` attribute - */ - unsigned char m_rotation = 0; - /** - * If text is wrapped - * Value | Description - * :---: | ----------- - * 0 | Normal text - * 1 | Text is wrapped at right margin - */ - int m_isTextWrapped = 0; - /** Indent level */ - int m_indentLevel = 0; - /** - * If font size is shrinked-to-fit - * Value | Description - * :---: | ----------- - * False | Normal font size - * True | Shrink font size to fit text into cell - */ - bool m_isShrinkToFit = false; - /** - * Text direction - * Value | Description - * :---: | ----------- - * 0 | According to context - * 1 | Left-to-right - * 2 | Right-to-left - */ - unsigned char m_textDirection = 0; -}; - - -/** - * @class XFBorder - * @brief - * A collection of border-related attributes of XF record - * @details - * Items correspond to those in the Excel UI's `Format` -> `Cells` -> `Border` tab - * There are five line style attributes; possible values and associated meanings: - * Value | Description - * :---: | ----------- - * 0 | No line - * 1 | Thin - * 2 | Medium - * 3 | Dashed - * 4 | Dotted - * 5 | Thick - * 6 | Double - * 7 | Hair - * 8 | Medium dashed - * 9 | Thin dash-dotted - * 10 | Medium dash-dotted - * 11 | Thin dash-dot-dotted - * 12 | Medium dash-dot-dotted - * 13 | Slanted medium dash-dotted - * Line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only - */ -class XFBorder { -public: - XFBorder() = default; - - /** Top line color */ - XFColor m_topColor; - /** Bottom line color */ - XFColor m_bottomColor; - /** Left line color */ - XFColor m_leftColor; - /** Right line color */ - XFColor m_rightColor; - /** Diagonal line color */ - XFColor m_diagColor; - /** Top line style */ - int m_topLineStyle = 0; - /** Bottom line style */ - int m_bottomLineStyle = 0; - /** Left line style */ - int m_leftLineStyle = 0; - /** Right line style */ - int m_rightLineStyle = 0; - /** Diagonal line style */ - int m_diagLineStyle = 0; - /** Draw diagonal from top left to bottom right */ - bool m_diagDown = false; - /** Draw diagonal from bottom left to top right */ - bool m_diagUp = false; -}; - - -/** - * @class XFBackground - * @brief - * A collection of background-related attributes of XF record - * @details - * Items correspond to those in the Excel UI's `Format` -> `Cells` -> `Patterns` tab. - */ -class XFBackground { -public: - XFBackground() = default; - - /** - * Background fill pattern. Section 3.11 of the OOo docs - * Value | Description - * :---: | ----------- - * 0 | None - * 1 | Solid - * 2 | Medium gray - * 3 | Dark gray - * 4 | Light gray - * 5 | Dark horizontal - * 6 | Dark vertical - * 7 | Dark down - * 8 | Dark up - * 9 | Dark grid - * 10 | Dark trellis - * 11 | Light horizontal - * 12 | Light vertical - * 13 | Light down - * 14 | Light up - * 15 | Lightg rid - * 16 | Light trellis - * 17 | Gray 125 - * 18 | Gray 0625 - */ - int m_fillPattern = 0; - /** Background color */ - XFColor m_backgroundColor; - /** Pattern color */ - XFColor m_patternColor; -}; - - -/** - * @class XFProtection - * @brief - * A collection of protection-related attributes of XF record - * @details - * Items correspond to those in the Excel UI's `Format` -> `Cells` -> `Protection` tab. - * @note - * The OOo docs include the "cell or style" bit in this bundle of attributes. - * This is incorrect; the bit is used in determining which bundles to use. - */ -class XFProtection { -public: - XFProtection() = default; - - /** - * If cell is locked (only if the sheet is protected) - * Value | Description - * :---: | ----------- - * False | Normal cell - * True | Cell is prevented from being changed, moved, resized, or deleted - */ - bool m_isCellLocked = false; - /** - * If formula is hidden (only if the sheet is protected) - * Value | Description - * :---: | ----------- - * False | Normal formula - * True | Hide formula so that it doesn't appear in formula bar when cell is selected - */ - bool m_isFormulaHidden = false; -}; - - -/** - * @class XF - * @brief - * eXtended Formatting information for cells, rows, columns and styles - */ -class XF { -public: - XF() = default; - - /** - * XF parent type - * Value | Description - * :---: | ----------- - * False | Cell XF - * False | Style XF - */ - bool m_isStyle = false; - /** - * Parent style index - * Description | Value - * ----------- | :---: - * Cell XF | Index into @ref Book::m_xfList of this XF's style XF - * Style XF | 0xFFF - */ - int m_parentStyleIndex = 0; - /** - * Each of flags describes the validity of specific group of attributes - * In cell XFs: - * Value | Description - * :---: | ----------- - * 0 | Attributes of parent style XF are used, (but only if attributes are valid there) - * 1 | Attributes of this XF are used - * In style XFs: - * Value | Description - * :---: | ----------- - * 0 | Attribute setting is valid - * 1 | Attribute should be ignored - */ - bool m_formatFlag = false; - bool m_fontFlag = false; - bool m_alignmentFlag = false; - bool m_borderFlag = false; - bool m_backgroundFlag = false; - bool m_protectionFlag = false; - bool m_lotusPrefix = false; - /** XF index in @ref Book::m_xfList */ - int m_xfIndex = 0; - /** Font index in @ref Book::m_fontList */ - unsigned short m_fontIndex = 0; - /** - * Key into @ref Book::m_formatMap - * OOo docs on XF record call this "Index to FORMAT record". It is a key to map. - * It is true only for Excel 4.0 and earlier files that the key into formatMap from XF - * instance is the same as the index into formatList, and only if index is less than 164 - */ - unsigned short m_formatKey = 0; - /** XFAlignment object */ - XFAlignment m_alignment; - /** XFBorder object */ - XFBorder m_border; - /** XFBackground object */ - XFBackground m_background; - /** XFProtection object */ - XFProtection m_protection; -}; - -} // End namespace \ No newline at end of file diff --git a/3rdparty/libs/fileext/excel/formula.cpp b/3rdparty/libs/fileext/excel/formula.cpp deleted file mode 100644 index 8ae8763..0000000 --- a/3rdparty/libs/fileext/excel/formula.cpp +++ /dev/null @@ -1,1550 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file formula.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 29.01.2018 - */ -#include -#include -#include - -#include "tools.hpp" - -#include "biffh.hpp" - -#include "formula.hpp" -#include - -namespace excel { - -/** List separator. Probably should depend on locale */ -const std::string LIST_SEPARATOR = ","; -/** Default leaf rank */ -const char LEAF_RANK = 90; -/** Default dunction rank */ -const char FUNC_RANK = 90; -/** index, {name, min#args, max#args, flags, #known_args, return_type, kargs) */ -const std::unordered_map> FUNC_DEFINITIONS { - {0, std::tuple("COUNT", 0, 30)}, - {1, std::tuple("IF", 2, 3)}, - {2, std::tuple("ISNA", 1, 1)}, - {3, std::tuple("ISERROR", 1, 1)}, - {4, std::tuple("SUM", 0, 30)}, - {5, std::tuple("AVERAGE", 1, 30)}, - {6, std::tuple("MIN", 1, 30)}, - {7, std::tuple("MAX", 1, 30)}, - {8, std::tuple("ROW", 0, 1)}, - {9, std::tuple("COLUMN", 0, 1)}, - {10, std::tuple("NA", 0, 0)}, - {11, std::tuple("NPV", 2, 30)}, - {12, std::tuple("STDEV", 1, 30)}, - {13, std::tuple("DOLLAR", 1, 2)}, - {14, std::tuple("FIXED", 2, 3)}, - {15, std::tuple("SIN", 1, 1)}, - {16, std::tuple("COS", 1, 1)}, - {17, std::tuple("TAN", 1, 1)}, - {18, std::tuple("ATAN", 1, 1)}, - {19, std::tuple("PI", 0, 0)}, - {20, std::tuple("SQRT", 1, 1)}, - {21, std::tuple("EXP", 1, 1)}, - {22, std::tuple("LN", 1, 1)}, - {23, std::tuple("LOG10", 1, 1)}, - {24, std::tuple("ABS", 1, 1)}, - {25, std::tuple("INT", 1, 1)}, - {26, std::tuple("SIGN", 1, 1)}, - {27, std::tuple("ROUND", 2, 2)}, - {28, std::tuple("LOOKUP", 2, 3)}, - {29, std::tuple("INDEX", 2, 4)}, - {30, std::tuple("REPT", 2, 2)}, - {31, std::tuple("MID", 3, 3)}, - {32, std::tuple("LEN", 1, 1)}, - {33, std::tuple("VALUE", 1, 1)}, - {34, std::tuple("TRUE", 0, 0)}, - {35, std::tuple("FALSE", 0, 0)}, - {36, std::tuple("AND", 1, 30)}, - {37, std::tuple("OR", 1, 30)}, - {38, std::tuple("NOT", 1, 1)}, - {39, std::tuple("MOD", 2, 2)}, - {40, std::tuple("DCOUNT", 3, 3)}, - {41, std::tuple("DSUM", 3, 3)}, - {42, std::tuple("DAVERAGE", 3, 3)}, - {43, std::tuple("DMIN", 3, 3)}, - {44, std::tuple("DMAX", 3, 3)}, - {45, std::tuple("DSTDEV", 3, 3)}, - {46, std::tuple("VAR", 1, 30)}, - {47, std::tuple("DVAR", 3, 3)}, - {48, std::tuple("TEXT", 2, 2)}, - {49, std::tuple("LINEST", 1, 4)}, - {50, std::tuple("TREND", 1, 4)}, - {51, std::tuple("LOGEST", 1, 4)}, - {52, std::tuple("GROWTH", 1, 4)}, - {56, std::tuple("PV", 3, 5)}, - {57, std::tuple("FV", 3, 5)}, - {58, std::tuple("NPER", 3, 5)}, - {59, std::tuple("PMT", 3, 5)}, - {60, std::tuple("RATE", 3, 6)}, - {61, std::tuple("MIRR", 3, 3)}, - {62, std::tuple("IRR", 1, 2)}, - {63, std::tuple("RAND", 0, 0)}, - {64, std::tuple("MATCH", 2, 3)}, - {65, std::tuple("DATE", 3, 3)}, - {66, std::tuple("TIME", 3, 3)}, - {67, std::tuple("DAY", 1, 1)}, - {68, std::tuple("MONTH", 1, 1)}, - {69, std::tuple("YEAR", 1, 1)}, - {70, std::tuple("WEEKDAY", 1, 2)}, - {71, std::tuple("HOUR", 1, 1)}, - {72, std::tuple("MINUTE", 1, 1)}, - {73, std::tuple("SECOND", 1, 1)}, - {74, std::tuple("NOW", 0, 0)}, - {75, std::tuple("AREAS", 1, 1)}, - {76, std::tuple("ROWS", 1, 1)}, - {77, std::tuple("COLUMNS", 1, 1)}, - {78, std::tuple("OFFSET", 3, 5)}, - {82, std::tuple("SEARCH", 2, 3)}, - {83, std::tuple("TRANSPOSE", 1, 1)}, - {86, std::tuple("TYPE", 1, 1)}, - {92, std::tuple("SERIESSUM", 4, 4)}, - {97, std::tuple("ATAN2", 2, 2)}, - {98, std::tuple("ASIN", 1, 1)}, - {99, std::tuple("ACOS", 1, 1)}, - {100, std::tuple("CHOOSE", 2, 30)}, - {101, std::tuple("HLOOKUP", 3, 4)}, - {102, std::tuple("VLOOKUP", 3, 4)}, - {105, std::tuple("ISREF", 1, 1)}, - {109, std::tuple("LOG", 1, 2)}, - {111, std::tuple("CHAR", 1, 1)}, - {112, std::tuple("LOWER", 1, 1)}, - {113, std::tuple("UPPER", 1, 1)}, - {114, std::tuple("PROPER", 1, 1)}, - {115, std::tuple("LEFT", 1, 2)}, - {116, std::tuple("RIGHT", 1, 2)}, - {117, std::tuple("EXACT", 2, 2)}, - {118, std::tuple("TRIM", 1, 1)}, - {119, std::tuple("REPLACE", 4, 4)}, - {120, std::tuple("SUBSTITUTE", 3, 4)}, - {121, std::tuple("CODE", 1, 1)}, - {124, std::tuple("FIND", 2, 3)}, - {125, std::tuple("CELL", 1, 2)}, - {126, std::tuple("ISERR", 1, 1)}, - {127, std::tuple("ISTEXT", 1, 1)}, - {128, std::tuple("ISNUMBER", 1, 1)}, - {129, std::tuple("ISBLANK", 1, 1)}, - {130, std::tuple("T", 1, 1)}, - {131, std::tuple("N", 1, 1)}, - {140, std::tuple("DATEVALUE", 1, 1)}, - {141, std::tuple("TIMEVALUE", 1, 1)}, - {142, std::tuple("SLN", 3, 3)}, - {143, std::tuple("SYD", 4, 4)}, - {144, std::tuple("DDB", 4, 5)}, - {148, std::tuple("INDIRECT", 1, 2)}, - {162, std::tuple("CLEAN", 1, 1)}, - {163, std::tuple("MDETERM", 1, 1)}, - {164, std::tuple("MINVERSE", 1, 1)}, - {165, std::tuple("MMULT", 2, 2)}, - {167, std::tuple("IPMT", 4, 6)}, - {168, std::tuple("PPMT", 4, 6)}, - {169, std::tuple("COUNTA", 0, 30)}, - {183, std::tuple("PRODUCT", 0, 30)}, - {184, std::tuple("FACT", 1, 1)}, - {189, std::tuple("DPRODUCT", 3, 3)}, - {190, std::tuple("ISNONTEXT", 1, 1)}, - {193, std::tuple("STDEVP", 1, 30)}, - {194, std::tuple("VARP", 1, 30)}, - {195, std::tuple("DSTDEVP", 3, 3)}, - {196, std::tuple("DVARP", 3, 3)}, - {197, std::tuple("TRUNC", 1, 2)}, - {198, std::tuple("ISLOGICAL", 1, 1)}, - {199, std::tuple("DCOUNTA", 3, 3)}, - {204, std::tuple("USDOLLAR", 1, 2)}, - {205, std::tuple("FINDB", 2, 3)}, - {206, std::tuple("SEARCHB", 2, 3)}, - {207, std::tuple("REPLACEB", 4, 4)}, - {208, std::tuple("LEFTB", 1, 2)}, - {209, std::tuple("RIGHTB", 1, 2)}, - {210, std::tuple("MIDB", 3, 3)}, - {211, std::tuple("LENB", 1, 1)}, - {212, std::tuple("ROUNDUP", 2, 2)}, - {213, std::tuple("ROUNDDOWN", 2, 2)}, - {214, std::tuple("ASC", 1, 1)}, - {215, std::tuple("DBCS", 1, 1)}, - {216, std::tuple("RANK", 2, 3)}, - {219, std::tuple("ADDRESS", 2, 5)}, - {220, std::tuple("DAYS360", 2, 3)}, - {221, std::tuple("TODAY", 0, 0)}, - {222, std::tuple("VDB", 5, 7)}, - {227, std::tuple("MEDIAN", 1, 30)}, - {228, std::tuple("SUMPRODUCT", 1, 30)}, - {229, std::tuple("SINH", 1, 1)}, - {230, std::tuple("COSH", 1, 1)}, - {231, std::tuple("TANH", 1, 1)}, - {232, std::tuple("ASINH", 1, 1)}, - {233, std::tuple("ACOSH", 1, 1)}, - {234, std::tuple("ATANH", 1, 1)}, - {235, std::tuple("DGET", 3, 3)}, - {244, std::tuple("INFO", 1, 1)}, - {247, std::tuple("DB", 4, 5)}, - {252, std::tuple("FREQUENCY", 2, 2)}, - {261, std::tuple("ERROR.TYPE", 1, 1)}, - {269, std::tuple("AVEDEV", 1, 30)}, - {270, std::tuple("BETADIST", 3, 5)}, - {271, std::tuple("GAMMALN", 1, 1)}, - {272, std::tuple("BETAINV", 3, 5)}, - {273, std::tuple("BINOMDIST", 4, 4)}, - {274, std::tuple("CHIDIST", 2, 2)}, - {275, std::tuple("CHIINV", 2, 2)}, - {276, std::tuple("COMBIN", 2, 2)}, - {277, std::tuple("CONFIDENCE", 3, 3)}, - {278, std::tuple("CRITBINOM", 3, 3)}, - {279, std::tuple("EVEN", 1, 1)}, - {280, std::tuple("EXPONDIST", 3, 3)}, - {281, std::tuple("FDIST", 3, 3)}, - {282, std::tuple("FINV", 3, 3)}, - {283, std::tuple("FISHER", 1, 1)}, - {284, std::tuple("FISHERINV", 1, 1)}, - {285, std::tuple("FLOOR", 2, 2)}, - {286, std::tuple("GAMMADIST", 4, 4)}, - {287, std::tuple("GAMMAINV", 3, 3)}, - {288, std::tuple("CEILING", 2, 2)}, - {289, std::tuple("HYPGEOMDIST", 4, 4)}, - {290, std::tuple("LOGNORMDIST", 3, 3)}, - {291, std::tuple("LOGINV", 3, 3)}, - {292, std::tuple("NEGBINOMDIST", 3, 3)}, - {293, std::tuple("NORMDIST", 4, 4)}, - {294, std::tuple("NORMSDIST", 1, 1)}, - {295, std::tuple("NORMINV", 3, 3)}, - {296, std::tuple("NORMSINV", 1, 1)}, - {297, std::tuple("STANDARDIZE", 3, 3)}, - {298, std::tuple("ODD", 1, 1)}, - {299, std::tuple("PERMUT", 2, 2)}, - {300, std::tuple("POISSON", 3, 3)}, - {301, std::tuple("TDIST", 3, 3)}, - {302, std::tuple("WEIBULL", 4, 4)}, - {303, std::tuple("SUMXMY2", 2, 2)}, - {304, std::tuple("SUMX2MY2", 2, 2)}, - {305, std::tuple("SUMX2PY2", 2, 2)}, - {306, std::tuple("CHITEST", 2, 2)}, - {307, std::tuple("CORREL", 2, 2)}, - {308, std::tuple("COVAR", 2, 2)}, - {309, std::tuple("FORECAST", 3, 3)}, - {310, std::tuple("FTEST", 2, 2)}, - {311, std::tuple("INTERCEPT", 2, 2)}, - {312, std::tuple("PEARSON", 2, 2)}, - {313, std::tuple("RSQ", 2, 2)}, - {314, std::tuple("STEYX", 2, 2)}, - {315, std::tuple("SLOPE", 2, 2)}, - {316, std::tuple("TTEST", 4, 4)}, - {317, std::tuple("PROB", 3, 4)}, - {318, std::tuple("DEVSQ", 1, 30)}, - {319, std::tuple("GEOMEAN", 1, 30)}, - {320, std::tuple("HARMEAN", 1, 30)}, - {321, std::tuple("SUMSQ", 0, 30)}, - {322, std::tuple("KURT", 1, 30)}, - {323, std::tuple("SKEW", 1, 30)}, - {324, std::tuple("ZTEST", 2, 3)}, - {325, std::tuple("LARGE", 2, 2)}, - {326, std::tuple("SMALL", 2, 2)}, - {327, std::tuple("QUARTILE", 2, 2)}, - {328, std::tuple("PERCENTILE", 2, 2)}, - {329, std::tuple("PERCENTRANK", 2, 3)}, - {330, std::tuple("MODE", 1, 30)}, - {331, std::tuple("TRIMMEAN", 2, 2)}, - {332, std::tuple("TINV", 2, 2)}, - {336, std::tuple("CONCATENATE", 0, 30)}, - {337, std::tuple("POWER", 2, 2)}, - {342, std::tuple("RADIANS", 1, 1)}, - {343, std::tuple("DEGREES", 1, 1)}, - {344, std::tuple("SUBTOTAL", 2, 30)}, - {345, std::tuple("SUMIF", 2, 3)}, - {346, std::tuple("COUNTIF", 2, 2)}, - {347, std::tuple("COUNTBLANK", 1, 1)}, - {350, std::tuple("ISPMT", 4, 4)}, - {351, std::tuple("DATEDIF", 3, 3)}, - {352, std::tuple("DATESTRING", 1, 1)}, - {353, std::tuple("NUMBERSTRING", 2, 2)}, - {354, std::tuple("ROMAN", 1, 2)}, - {358, std::tuple("GETPIVOTDATA", 2, 2)}, - {359, std::tuple("HYPERLINK", 1, 2)}, - {360, std::tuple("PHONETIC", 1, 1)}, - {361, std::tuple("AVERAGEA", 1, 30)}, - {362, std::tuple("MAXA", 1, 30)}, - {363, std::tuple("MINA", 1, 30)}, - {364, std::tuple("STDEVPA", 1, 30)}, - {365, std::tuple("VARPA", 1, 30)}, - {366, std::tuple("STDEVA", 1, 30)}, - {367, std::tuple("VARA", 1, 30)}, - {368, std::tuple("BAHTTEXT", 1, 1)}, - {369, std::tuple("THAIDAYOFWEEK", 1, 1)}, - {370, std::tuple("THAIDIGIT", 1, 1)}, - {371, std::tuple("THAIMONTHOFYEAR", 1, 1)}, - {372, std::tuple("THAINUMSOUND", 1, 1)}, - {373, std::tuple("THAINUMSTRING", 1, 1)}, - {374, std::tuple("THAISTRINGLENGTH", 1, 1)}, - {375, std::tuple("ISTHAIDIGIT", 1, 1)}, - {376, std::tuple("ROUNDBAHTDOWN", 1, 1)}, - {377, std::tuple("ROUNDBAHTUP", 1, 1)}, - {378, std::tuple("THAIYEAR", 1, 1)}, - {379, std::tuple("RTD", 2, 5)} -}; -/** - * SZTABN[code] -> the number of bytes to consume. Which N to use? Depends on biff_version. - * Value | Description - * :---: | ----------- - * -1 | Variable - * -2 | Code not implemented in this version - */ -const std::vector SZTAB0 { - -2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2,-1, 8, - 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, - -2, -2, 3, -2, -2, -2, -2, -2, -2, -2 -}; -const std::vector SZTAB1 { - -2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, - 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, - -2, 3, -2, -2, -2, -2, -2, -2, -2 -}; -const std::vector SZTAB2 { - -2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, - 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2 -}; -const std::vector SZTAB3 { - -2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, - 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, - -2, -2, 25, 18, 21, 18, 21, -2, -2 -}; -const std::vector SZTAB4 { - -2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, - 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, - -2, -2, 7, 7, 11, 7, 11, -2, -2 -}; -const std::unordered_map> SZDICT { - {20, SZTAB0}, - {21, SZTAB0}, - {30, SZTAB1}, - {40, SZTAB2}, - {45, SZTAB2}, - {50, SZTAB3}, - {70, SZTAB3}, - {80, SZTAB4} -}; -/** - * This dictionary can be used to produce text version of internal codes - * that Excel uses for error cells - */ -const std::vector ERROR_CODES {0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F}; -/** Operation name list */ -const std::vector OPERATION_NAMES { - "Unk00", "Exp", "Tbl", "Add", "Sub", "Mul", "Div", "Power", "Concat", "LT", "LE", "EQ", - "GE", "GT", "NE", "Isect", "List", "Range", "Uplus", "Uminus", "Percent", "Paren", "MissArg", - "Str", "Extended", "Attr", "Sheet", "EndSheet", "Err", "Bool", "Int", "Num", "Array", "Func", - "FuncVar", "Name", "Ref", "Area", "MemArea", "MemErr", "MemNoMem", "MemFunc", "RefErr", - "AreaErr", "RefN", "AreaN", "MemAreaN", "MemNoMemN", "", "", "", "", "", "", "", "", "FuncCE", - "NameX", "Ref3d", "Area3d", "RefErr3d", "AreaErr3d", "", "" -}; - -// Formula public: -Formula::Formula(Book* book) - : m_book(book) {} - -void Formula::evaluateFormula(Name& name, int nameIndex, int level) { - try { - if (level > 10) - throw std::logic_error("Excessive indirect references in NAME formula"); - const std::vector& sztab = SZDICT.at(m_book->m_biffVersion); - std::string data = name.m_rawFormula; - bool relDelta = true; // All defined name formulas use "Method B" [OOo docs] - bool hasRelation = false; - bool hasError = false; - int pos = 0; - std::vector stack; - Operand unkOp(oUNK); - Operand errorOp(oERR); - - if (name.m_basicFormulaLength == 0) - stack = {unkOp}; - - while (0 <= pos && pos < name.m_basicFormulaLength) { - char op = data[pos]; - char opCode = op & 0x1f; - char opType = (op & 0x60) >> 5; - int opIndex = opType ? opCode : opCode + 32; - char size = sztab[opIndex]; - const std::string& opName = OPERATION_NAMES.at(opIndex); - - if (size == -2) - throw std::logic_error( - "ERROR *** Unexpected token "+ std::to_string(op) +" ("+ opName + - "); biff_version=" + std::to_string(m_book->m_biffVersion)); - if (!opType) { - // unkOp, tExp, tTbl - if (0x00 <= opCode && opCode <= 0x02) { - throw std::logic_error( - "ERROR *** Token "+ std::to_string(op) +" ("+ opName + - ") found in NAME formula" - ); - } - // Add, Sub, Mul, Div, Power, tConcat, tLT, ..., tNE - else if (0x03 <= opCode && opCode <= 0x0E) { - if (stack.empty()) - throw std::logic_error("stack is empty"); - binOperation(opCode, stack); - } - // tIsect - else if (opCode == 0x0F) { - if (stack.empty()) - throw std::logic_error("stack is empty"); - - Operand rightOp = stack.back(); - stack.pop_back(); - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand leftOp = stack.back(); - stack.pop_back(); - int rank = 80; // #Check# - - std::string opText; - opText += (leftOp.m_rank < rank) ? "(" : ""; - opText += leftOp.m_text; - opText += (leftOp.m_rank < rank) ? ")" : ""; - opText += " "; - opText += (rightOp.m_rank < rank) ? "(" : ""; - opText += rightOp.m_text; - opText += (rightOp.m_rank < rank) ? ")" : ""; - - Operand res(oREF); - res.m_text = opText; - if (leftOp.m_kind == oERR || rightOp.m_kind == oERR) { - res.m_kind = oERR; - } - else if (leftOp.m_kind == oREF && rightOp.m_kind == oREF) { - if (!leftOp.m_value.empty() && !rightOp.m_value.empty()) { - std::vector coords; - rangeOperation(coords, leftOp.m_value[0], rightOp.m_value[0], 0); - res.m_value = {Ref3D(coords)}; - } - } - else if (leftOp.m_kind == oREL && rightOp.m_kind == oREL) { - res.m_kind = oREL; - if (!leftOp.m_value.empty() && !rightOp.m_value.empty()) { - std::vector coords; - rangeOperation(coords, leftOp.m_value[0], rightOp.m_value[0], 0); - if (leftOp.m_value[0].m_relationFlags == rightOp.m_value[0].m_relationFlags) { - auto ref(leftOp.m_value[0].m_relationFlags); - coords.insert(coords.end(), ref.begin(), ref.end()); - res.m_value = {Ref3D(coords)}; - } - } - } - stack.push_back(res); - } - // tList - else if (opCode == 0x10) { - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand rightOp = stack.back(); - stack.pop_back(); - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand leftOp = stack.back(); - stack.pop_back(); - int rank = 80; // #Check# - - std::string opText; - opText += (leftOp.m_rank < rank) ? "(" : ""; - opText += leftOp.m_text; - opText += (leftOp.m_rank < rank) ? ")" : ""; - opText += ","; - opText += (rightOp.m_rank < rank) ? "(" : ""; - opText += rightOp.m_text; - opText += (rightOp.m_rank < rank) ? ")" : ""; - - Operand res(oREF, {}, rank, opText); - if (leftOp.m_kind == oERR || rightOp.m_kind == oERR) { - res.m_kind = oERR; - } - else if ( - (leftOp.m_kind == oREF || leftOp.m_kind == oREL) && - (rightOp.m_kind == oREF || rightOp.m_kind == oREL) - ) { - res.m_kind = oREF; - if (leftOp.m_kind == oREL || rightOp.m_kind == oREL) - res.m_kind = oREL; - - if (!leftOp.m_value.empty() && !rightOp.m_value.empty()) { - auto val(leftOp.m_value); - val.insert(val.end(), rightOp.m_value.begin(), rightOp.m_value.end()); - res.m_value = val; - } - } - stack.push_back(res); - } - // tRange - else if (opCode == 0x11) { - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand rightOp = stack.back(); - stack.pop_back(); - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand leftOp = stack.back(); - stack.pop_back(); - int rank = 80; // #Check# - - std::string opText; - opText += (leftOp.m_rank < rank) ? "(" : ""; - opText += leftOp.m_text; - opText += (leftOp.m_rank < rank) ? ")" : ""; - opText += ":"; - opText += (rightOp.m_rank < rank) ? "(" : ""; - opText += rightOp.m_text; - opText += (rightOp.m_rank < rank) ? ")" : ""; - - Operand res(oREF, {}, rank, opText); - if (leftOp.m_kind == oERR || rightOp.m_kind == oERR) { - res.m_kind = oERR; - } - else if (leftOp.m_kind == oREF && rightOp.m_kind == oREF) { - if (!leftOp.m_value.empty() && !rightOp.m_value.empty()) { - std::vector coords; - rangeOperation(coords, leftOp.m_value[0], rightOp.m_value[0], 1); - res.m_value = {Ref3D(coords)}; - } - } - else if (leftOp.m_kind == oREL && rightOp.m_kind == oREL) { - res.m_kind = oREL; - if (!leftOp.m_value.empty() && !rightOp.m_value.empty()) { - std::vector coords; - rangeOperation(coords, leftOp.m_value[0], rightOp.m_value[0], 1); - if (leftOp.m_value[0].m_relationFlags == rightOp.m_value[0].m_relationFlags) { - auto ref(leftOp.m_value[0].m_relationFlags); - coords.insert(coords.end(), ref.begin(), ref.end()); - res.m_value = {Ref3D(coords)}; - } - } - } - stack.push_back(res); - } - // tUplus, tUminus, tPercent - else if (0x12 <= opCode && opCode <= 0x14) { - if (stack.empty()) - throw std::logic_error("stack is empty"); - unaryOperation(opCode, stack, oNUM); - } - // tParen - // else if (opCode == 0x15) - // tMissArg - else if (opCode == 0x16) { - stack.push_back(Operand(oMSNG, {}, LEAF_RANK)); - } - // tStr - else if (opCode == 0x17) { - int newPos = pos + 1; - std::string str = m_book->unpackStringUpdatePos(data, newPos, 1); - size = newPos - pos; - - std::string text = str; - tools::replaceAll(text, "\"", "\"\""); - text = "\"" + str + "\""; - // Operand(oSTRG, strg, LEAF_RANK, text) - stack.push_back(Operand(oSTRG, {}, LEAF_RANK, text, str)); - } - // tExtended (New with BIFF 8) - else if (opCode == 0x18) { - throw std::domain_error("tExtended token is not implemented"); - } - // tAttr - else if (opCode == 0x19) { - unsigned char subop = m_book->readByte(data, pos+1, 1); - unsigned short nc = m_book->readByte(data, pos+2, 2); - - if (subop == 0x04) // Choose - size = nc * 2 + 6; - else if (subop == 0x10) { // Sum (single arg) - size = 4; - if (stack.empty()) - throw std::logic_error("stack is empty"); - Operand leftOp = stack.back(); - stack[stack.size()-1] = Operand(oNUM, {}, FUNC_RANK, "SUM("+ leftOp.m_text +")"); - } - else - size = 4; - } - // tSheet, tEndSheet - else if (0x1A <= opCode && opCode <= 0x1B) { - throw std::domain_error("tSheet and tEndsheet tokens are not implemented"); - } - // tErr, tBool, tInt, tNum - else if (0x1C <= opCode && opCode <= 0x1F) { - int index = opCode - 0x1C; - int kind; - double value; - std::string text; - // tBool - if (index == 1) { - kind = oBOOL; - value = m_book->readByte(data, pos+1, 1); - text = value ? "TRUE" : "FALSE"; - } - // tInt - else if (index == 2) { - kind = oNUM; - value = m_book->readByte(data, pos+1, 2); - text = std::to_string((float)value); - } - // tNum - else if (index == 3) { - kind = oNUM; - value = m_book->readByte(data, pos+1, 8); - text = std::to_string(value); - } - else { - kind = oERR; - value = m_book->readByte(data, pos+1, 1); - text = "\"" + ERROR_TEXT_FROM_CODE.at(static_cast(value)) + "\""; - } - // Operand(kind, value, LEAF_RANK, text) - stack.push_back(Operand(kind, {}, LEAF_RANK, text, std::to_string(value))); - } - else { - throw std::logic_error("Unhandled opCode: " + std::to_string(opCode)); - } - if (size <= 0) { - throw std::logic_error("Size not set for opCode " + std::to_string(opCode)); - } - pos += size; - continue; - } - // tArray - if (opCode == 0x00) { - stack.push_back(unkOp); - } - // tFunc - else if (opCode == 0x01) { - int recordSize = 1 + (m_book->m_biffVersion >= 40); - unsigned short funcx = m_book->readByte(data, pos+1, recordSize); - - if (FUNC_DEFINITIONS.find(funcx) == FUNC_DEFINITIONS.end()) { - stack.push_back(unkOp); - } - else { - std::string opText; - std::string funcName = std::get<0>(FUNC_DEFINITIONS.at(funcx)); - char argCount = std::get<1>(FUNC_DEFINITIONS.at(funcx)); - if (argCount) { - std::string argtext; - for (int i = 0; i < argCount; ++i) { - if (i != 0) - argtext += LIST_SEPARATOR; - if (stack.empty()) - throw std::logic_error("stack is empty"); - argtext += stack.back().m_text; - stack.pop_back(); - } - opText = funcName +"("+ argtext +")"; - } - else { - opText = funcName + "()"; - } - stack.push_back(Operand(oUNK, {}, FUNC_RANK, opText)); - } - } - // tFuncVar - else if (opCode == 0x02) { - int recordSize = 1 + (m_book->m_biffVersion >= 40); - unsigned char argCount = m_book->readByte(data, pos+1, 1) % 128; - unsigned short funcx = m_book->readByte(data, pos+2, recordSize) % 32768; - - if (FUNC_DEFINITIONS.find(funcx) == FUNC_DEFINITIONS.end()) { - stack.push_back(unkOp); - } - else { - std::string funcName = std::get<0>(FUNC_DEFINITIONS.at(funcx)); - //char minArgCount = std::get<1>(FUNC_DEFINITIONS[funcx]); - //char maxArgCount = std::get<2>(FUNC_DEFINITIONS[funcx]); - - std::string argtext; - for (int i = 0; i < argCount; ++i) { - if (i != 0) - argtext += LIST_SEPARATOR; - if (stack.empty()) - throw std::logic_error("stack is empty"); - argtext += stack.back().m_text; - } - std::string opText = funcName +"("+ argtext +")"; - Operand res(oUNK, {}, FUNC_RANK, opText); - - if (stack.empty()) - throw std::logic_error("stack is empty"); - auto& testOp = stack[stack.size() - argCount]; - int testValue = 0; - try { - testValue = std::stoi(testOp.m_textValue); - } - catch (...) {} - - // IF - if (funcx == 1) { - if ( - (testOp.m_kind == oNUM || testOp.m_kind == oBOOL) && - (testValue == 0 || testValue == 1) - ) { - if (argCount == 2 && !testValue) { - // IF(FALSE, tv) => FALSE - res.m_kind = oBOOL; - res.m_textValue = "0"; - } - else { - int respos = -argCount + 2 - testValue; - if (respos < 0) - respos = (int)stack.size() - respos; - if (respos > stack.size()) - return; - auto& chosen = stack[respos]; - if (chosen.m_kind == oMSNG) { - res.m_kind = oNUM; - res.m_textValue = "0"; - } - else { - res.m_kind = chosen.m_kind; - res.m_textValue = chosen.m_textValue; - } - } - } - } - // CHOOSE - else if (funcx == 100) { - if (testOp.m_kind == oNUM && (1 <= testValue && testValue < argCount)) { - int respos = -argCount - testValue; - if (respos < 0) - respos = (int)stack.size() - respos; - - if (respos > stack.size()) - return; - auto& chosen = stack[respos]; - if (chosen.m_kind == oMSNG){ - res.m_kind = oNUM; - res.m_textValue = "0"; - } - else { - res.m_kind = chosen.m_kind; - res.m_textValue = chosen.m_textValue; - } - } - } - - for (int i = 0; i < argCount; ++i) - stack.pop_back(); - stack.push_back(res); - } - } - // tName - else if (opCode == 0x03) { - unsigned short targetNameIndex = m_book->readByte(data, pos+1, 2) - 1; - - // 添加边界检查,防止访问无效内存 - if (targetNameIndex >= m_book->m_nameObjList.size()) { - hasError = true; - stack.push_back(errorOp); - } - else { - // Only change with BIFF version is number of trailing UNUSED bytes! - Name& targetName = m_book->m_nameObjList[targetNameIndex]; - // Recursive - if (!targetName.m_evaluated) - evaluateFormula(targetName, targetNameIndex, level+1); - - Operand res(oUNK); - if (!targetName.m_stack.empty() && !(targetName.m_macro || targetName.m_isBinary || targetName.m_hasError)) - res = targetName.m_stack[0]; - res.m_rank = LEAF_RANK; - - if (targetName.m_scope == -1) { - res.m_text = targetName.m_name; - hasError = (hasError || targetName.m_macro || targetName.m_isBinary || targetName.m_hasError); - hasRelation = (hasRelation || targetName.m_hasRelation); - } - else { - res.m_text = m_book->m_sheetNames[targetName.m_scope] + "%s!" + targetName.m_name; - } - stack.push_back(res); - } - } - // tRef - else if (opCode == 0x04) { - hasRelation = true; - std::vector address; - getCellAddress(address, data, pos+1, relDelta); - std::vector coords {0, 1, address[0], address[0] + 1, address[1], address[1] + 1}; - - Operand res(oUNK); - if (opType == 1) { - std::vector relflags {1, 1, address[2], address[2], address[3], address[3]}; - coords.insert(coords.end(), relflags.begin(), relflags.end()); - res = Operand(oREL, {Ref3D(coords)}); - } - stack.push_back(res); - } - // tArea - else if (opCode == 0x05) { - hasRelation = true; - std::vector address1, address2; - getCellRangeAddress(address1, address2, data, pos+1, relDelta); - std::vector coords {0, 1, address1[0], address2[0] + 1, address1[1], address2[1] + 1}; - - Operand res(oUNK); - if (opType == 1) { - std::vector relflags {1, 1, address1[2], address2[2], address1[3], address2[3]}; - coords.insert(coords.end(), relflags.begin(), relflags.end()); - res = Operand(oREL, {Ref3D(coords)}); - } - stack.push_back(res); - } - // tMemArea - else if (opCode == 0x06) { - throw std::logic_error( - "ERROR *** Token "+ std::to_string(op) +" ("+ opName + - ") found in NAME formula" - ); - } - // tMemFunc - //else if (opCode == 0x09) { - // unsigned short recordSize = m_book->readByte(data, pos+1, 2); - //} - // tRefN - else if (opCode == 0x0C) { - throw std::logic_error( - "ERROR *** Token "+ std::to_string(op) +" ("+ opName + - ") found in NAME formula" - ); - } - // tAreaN - else if (opCode == 0x0D) { - throw std::logic_error( - "ERROR *** Token "+ std::to_string(op) +" ("+ opName + - ") found in NAME formula" - ); - } - // tRef3d - else if (opCode == 0x1A) { - int sheetIndex1, sheetIndex2; - std::vector address; - if (m_book->m_biffVersion >= 80) { - getCellAddress(address, data, pos+3, relDelta); - unsigned short refIndex = m_book->readByte(data, pos+1, 2); - getExternalSheetLocalRange(sheetIndex1, sheetIndex2, refIndex); - } - else { - getCellAddress(address, data, pos+15, relDelta); - short rawExternalSheetIndex = m_book->readByte(data, pos+1, 2); - short refFirstSheetIndex = m_book->readByte(data, pos+11, 2); - short refLastSheetIndex = m_book->readByte(data, pos+13, 2); - getExternalSheetLocalRangeB57( - sheetIndex1, sheetIndex2, rawExternalSheetIndex, - refFirstSheetIndex, refLastSheetIndex - ); - } - std::vector coords { - sheetIndex1, sheetIndex2+1, address[0], - address[0] + 1, address[1], address[1] + 1 - }; - - bool isRelation = (address[2] || address[3]); - hasRelation = (hasRelation || isRelation); - hasError |= sheetIndex1 < -1; - - Operand res(oUNK); - if (isRelation) { - res.m_kind = oREL; - std::vector relflags {0, 0, address[2], address[2], address[3], address[3]}; - res.m_text = rangeName3DRel(coords, relflags, 0, 0, true); - - coords.insert(coords.end(), relflags.begin(), relflags.end()); - } - else { - res.m_kind = oREF; - res.m_text = rangeName3D(coords); - } - res.m_rank = LEAF_RANK; - if (opType == 1) - res.m_value = {Ref3D(coords)}; - - stack.push_back(res); - } - // tArea3d - else if (opCode == 0x1B) { - int sheetIndex1, sheetIndex2; - std::vector address1, address2; - if (m_book->m_biffVersion >= 80) { - getCellRangeAddress(address1, address2, data, pos+3, relDelta); - unsigned short refIndex = m_book->readByte(data, pos+1, 2); - getExternalSheetLocalRange(sheetIndex1, sheetIndex2, refIndex); - } - else { - getCellRangeAddress(address1, address2, data, pos+15, relDelta); - short rawExternalSheetIndex = m_book->readByte(data, pos+1, 2); - short refFirstSheetIndex = m_book->readByte(data, pos+11, 2); - short refLastSheetIndex = m_book->readByte(data, pos+13, 2); - getExternalSheetLocalRangeB57(sheetIndex1, sheetIndex2, rawExternalSheetIndex, refFirstSheetIndex, refLastSheetIndex); - } - std::vector coords { - sheetIndex1, sheetIndex2+1, address1[0], - address2[0] + 1, address1[1], address2[1] + 1 - }; - - bool isRelation = (address1[2] || address1[3] || address2[2] || address2[3]); - hasRelation = (hasRelation || isRelation); - hasError |= sheetIndex1 < -1; - - Operand res(oUNK); - if (isRelation) { - res.m_kind = oREL; - std::vector relflags {0, 0, address1[2], address2[2], address1[3], address2[3]}; - res.m_text = rangeName3DRel(coords, relflags, 0, 0, true); - - coords.insert(coords.end(), relflags.begin(), relflags.end()); - } - else { - res.m_kind = oREF; - res.m_text = rangeName3D(coords); - } - res.m_rank = LEAF_RANK; - if (opType == 1) - res.m_value = {Ref3D(coords)}; - - stack.push_back(res); - } - // tNameX - else if (opCode == 0x19) { - bool dodgy = false; - int refIndex; - int originalRefIndex; - unsigned short targetNameIndex; - - Operand res(oUNK); - if (m_book->m_biffVersion >= 80) { - refIndex = m_book->readByte(data, pos+1, 2); - targetNameIndex = m_book->readByte(data, pos+3, 2) - 1; - originalRefIndex = refIndex; - } - else { - refIndex = m_book->readByte(data, pos+1, 2); - targetNameIndex = m_book->readByte(data, pos+11, 2) - 1; - originalRefIndex = refIndex; - if (refIndex > 0) - refIndex--; - else if (refIndex < 0) - refIndex = -refIndex - 1; - else - dodgy = true; - } - if (targetNameIndex == nameIndex) { - dodgy = true; - hasError = true; - } - - int sheetIndex1, sheetIndex2; - if (!dodgy) { - if (m_book->m_biffVersion >= 80) { - getExternalSheetLocalRange(sheetIndex1, sheetIndex2, refIndex); - } - // External ref - else if (originalRefIndex > 0) { - sheetIndex1 = -4; - sheetIndex2 = -4; - } - else { - int type = m_book->m_externalSheetTypes[refIndex]; - // Internal, any sheet - if (type == 4) { - sheetIndex1 = -1; - sheetIndex2 = -1; - } - else { - sheetIndex1 = -666; - sheetIndex2 = -666; - } - } - } - - if (dodgy || sheetIndex1 < -1) { - res = Operand( - oUNK, {}, LEAF_RANK, - "<>" - ); - } - else { - Name& targetName = m_book->m_nameObjList[targetNameIndex]; - // Recursive - if (!targetName.m_evaluated) - evaluateFormula(targetName, targetNameIndex, level+1); - - if (targetName.m_macro || targetName.m_isBinary || targetName.m_hasError) { - res = Operand(oUNK); - hasError = (hasError || targetName.m_macro || targetName.m_isBinary || targetName.m_hasError); - hasRelation = (hasRelation || targetName.m_hasRelation); - } - else { - // 防御性检查:确保 stack 不为空 - if (!targetName.m_stack.empty()) - res = targetName.m_stack[0]; - else { - res = Operand(oERR); - hasError = true; - } - } - - res.m_rank = LEAF_RANK; - if (targetName.m_scope == -1) - res.m_text = targetName.m_name; - else - res.m_text = m_book->m_sheetNames[targetName.m_scope] + "!" + targetName.m_name; - } - stack.push_back(res); - } - else if (find(ERROR_CODES.begin(), ERROR_CODES.end(), opCode) != ERROR_CODES.end()) { - hasError = true; - stack.push_back(errorOp); - } - else { - hasError = true; - } - - if (size <= 0) - throw std::logic_error("Fatal: token size is not positive"); - pos += size; - } - name.m_stack = stack; - //if (stack.size() != 1) - // name.m_result = {}; - //else - // name.m_result = stack[0]; - name.m_hasRelation = hasRelation; - name.m_hasError = hasError; - name.m_evaluated = true; - } catch (const std::exception &e) { - std::cout << e.what() << std::endl; - // 确保异常后 m_stack 不为空,避免后续访问崩溃 - if (name.m_stack.empty()) { - name.m_stack.push_back(Operand(oERR)); - } - name.m_hasError = true; - name.m_evaluated = true; - } -} - - -// Formula private: -void Formula::binOperation(int code, std::vector& stack) const { - Operand rightOp = stack.back(); - stack.pop_back(); - Operand leftOp = stack.back(); - stack.pop_back(); - - double leftValue = 0; - double rightValue = 0; - double result; - int funcKind; - int rank; - std::string symbol; - std::string opText; - std::string stringResult; - try { - leftValue = std::stod(leftOp.m_textValue); - rightValue = std::stod(rightOp.m_textValue); - } - catch (...) {} - - if (code == 0x03) { - funcKind = oNUM; - rank = 30; - symbol = "+"; - result = leftValue + rightValue; - } - else if (code == 0x04) { - funcKind = oNUM; - rank = 30; - symbol = "-"; - result = leftValue - rightValue; - } - else if (code == 0x05) { - funcKind = oNUM; - rank = 40; - symbol = "*"; - result = leftValue * rightValue; - } - else if (code == 0x06) { - funcKind = oNUM; - rank = 40; - symbol = "/"; - result = leftValue / rightValue; - } - else if (code == 0x07) { - funcKind = oNUM; - rank = 50; - symbol = "^"; - result = pow(leftValue, rightValue); - } - else if (code == 0x08) { - funcKind = oSTRG; - rank = 20; - symbol = "&"; - stringResult = leftOp.m_textValue + rightOp.m_textValue; - } - else if (code == 0x09) { - funcKind = oBOOL; - rank = 10; - symbol = "<"; - result = (leftValue < rightValue); - } - else if (code == 0x0A) { - funcKind = oBOOL; - rank = 10; - symbol = " <= "; - result = (leftValue <= rightValue); - } - else if (code == 0x0B) { - funcKind = oBOOL; - rank = 10; - symbol = "="; - result = (leftValue == rightValue); - } - else if (code == 0x0C) { - funcKind = oBOOL; - rank = 10; - symbol = " >= "; - result = (leftValue >= rightValue); - } - else if (code == 0x0D) { - funcKind = oBOOL; - rank = 10; - symbol = ">"; - result = (leftValue > rightValue); - } - else { // if (code == 0x0E) - funcKind = oBOOL; - rank = 10; - symbol = "<>"; - result = (leftValue != rightValue); - } - - opText += (leftOp.m_rank < rank) ? "(" : ""; - opText += leftOp.m_text; - opText += (leftOp.m_rank < rank) ? ")" : ""; - opText += symbol; - opText += (rightOp.m_rank < rank) ? "(" : ""; - opText += rightOp.m_text; - opText += (rightOp.m_rank < rank) ? ")" : ""; - - Operand resultOp(funcKind, {}, rank, opText); - if (!leftValue || !rightValue) { - stack.push_back(resultOp); - return; - } - - resultOp.m_textValue = (funcKind == oSTRG) ? stringResult : std::to_string(result); - stack.push_back(resultOp); -} - -void Formula::unaryOperation(int code, std::vector& stack, int resultKind) const { - Operand leftOp = stack.back(); - stack.pop_back(); - - double leftValue = std::stod(leftOp.m_textValue); - int rank; - std::string symbol1, symbol2; - std::string opText; - - if (code == 0x12) { - rank = 70; - symbol1 = "+"; - } - else if (code == 0x13) { - rank = 70; - symbol1 = "-"; - leftValue = -leftValue; - } - else { // if (code == 0x14) - rank = 60; - symbol2 = "%%"; - leftValue = leftValue / 100.0; - } - - opText += symbol1; - opText += (leftOp.m_rank < rank) ? "(" : ""; - opText += leftOp.m_text; - opText += (leftOp.m_rank < rank) ? ")" : ""; - opText += symbol2; - - if (!leftOp.m_textValue.empty()) - leftOp.m_textValue = std::to_string(leftValue); - // Operand(result_kind, val, rank, opText) - stack.push_back(Operand(resultKind, {}, rank, opText, leftOp.m_textValue)); -} - -void Formula::rangeOperation(std::vector& coords, Ref3D& leftValue, - Ref3D& rightValue, int functionType) const -{ - size_t size = leftValue.m_coords.size(); - for (size_t i = 0; i < size; ++i) { - if (((i + functionType) & 1) == 1) - coords.push_back(std::max(leftValue.m_coords[i], rightValue.m_coords[i])); - else - coords.push_back(std::min(leftValue.m_coords[i], rightValue.m_coords[i])); - } -} - -void Formula::getCellAddress(std::vector& address, const std::string& data, - int pos, bool relDelta, int rowIndex, int colIndex) const -{ - if (m_book->m_biffVersion >= 80) { - unsigned short rowValue = m_book->readByte(data, pos, 2); - unsigned short colValue = m_book->readByte(data, pos+2, 2); - adjustCellAddressBiff8(address, rowValue, colValue, relDelta, rowIndex, colIndex); - } - else { - unsigned short rowValue = m_book->readByte(data, pos, 2); - unsigned char colValue = m_book->readByte(data, pos+2, 1); - adjustCellAddressBiff7(address, rowValue, colValue, relDelta, rowIndex, colIndex); - } -} - -void Formula::getCellRangeAddress(std::vector& address1, std::vector& address2, - const std::string& data, int pos, bool relDelta, - int rowIndex, int colIndex) const -{ - if (m_book->m_biffVersion >= 80) { - unsigned short row1Value = m_book->readByte(data, pos, 2); - unsigned short row2Value = m_book->readByte(data, pos+2, 2); - unsigned short col1Value = m_book->readByte(data, pos+4, 2); - unsigned short col2Value = m_book->readByte(data, pos+6, 2); - adjustCellAddressBiff8(address1, row1Value, col1Value, relDelta, rowIndex, colIndex); - adjustCellAddressBiff8(address2, row2Value, col2Value, relDelta, rowIndex, colIndex); - } - else { - unsigned short row1Value = m_book->readByte(data, pos, 2); - unsigned short row2Value = m_book->readByte(data, pos+2, 2); - unsigned char col1Value = m_book->readByte(data, pos+4, 1); - unsigned char col2Value = m_book->readByte(data, pos+5, 1); - adjustCellAddressBiff7(address1, row1Value, col1Value, relDelta, rowIndex, colIndex); - adjustCellAddressBiff7(address2, row2Value, col2Value, relDelta, rowIndex, colIndex); - } -} - -void Formula::adjustCellAddressBiff8(std::vector& address, int rowValue, int colValue, - bool relDelta, int rowIndex, int colIndex) const -{ - int rowRel = (colValue >> 15) & 1; - int colRel = (colValue >> 14) & 1; - int rIndex = rowValue; - int cIndex = colValue & 0xff; - if (relDelta) { - if (rowRel && rIndex >= 32768) - rIndex -= 65536; - if (colRel && cIndex >= 128) - cIndex -= 256; - } - else { - if (rowRel) - rIndex -= rowIndex; - if (colRel) - cIndex -= colIndex; - } - address = {rIndex, cIndex, rowRel, colRel}; -} - -void Formula::adjustCellAddressBiff7(std::vector& address, int rowValue, int colValue, - bool relDelta, int rowIndex, int colIndex) const -{ - int rowRel = (rowValue >> 15) & 1; - int colRel = (rowValue >> 14) & 1; - int rIndex = rowValue & 0x3fff; - int cIndex = colValue; - if (relDelta) { - if (rowRel && rIndex >= 8192) - rIndex -= 16384; - if (colRel && cIndex >= 128) - cIndex -= 256; - } - else { - if (rowRel) - rIndex -= rowIndex; - if (colRel) - cIndex -= colIndex; - } - address = {rIndex, cIndex, rowRel, colRel}; -} - -void Formula::getExternalSheetLocalRange(int& sheetIndex1, int& sheetIndex2, int refIndex) const { - try { - if (refIndex >= m_book->m_externalSheetInfo.size()) - throw ""; - - int refRecordIndex = m_book->m_externalSheetInfo[refIndex][0]; - int refFirstSheetIndex = m_book->m_externalSheetInfo[refIndex][1]; - int refLastSheetIndex = m_book->m_externalSheetInfo[refIndex][2]; - if (std::max(refFirstSheetIndex, refLastSheetIndex) >= m_book->m_sheetMap.size()) - throw ""; - - int xlSheetIndex1 = m_book->m_sheetMap[refFirstSheetIndex]; - int xlSheetIndex2 = m_book->m_sheetMap[refLastSheetIndex]; - - if (refRecordIndex == m_book->m_supbookAddinIndex) { - sheetIndex1 = -5; - sheetIndex2 = -5; - } - // External reference - else if (refRecordIndex != m_book->m_supbookLocalIndex) { - sheetIndex1 = -4; - sheetIndex2 = -4; - } - // Internal reference, any sheet - else if (refFirstSheetIndex == 0xFFFE && refLastSheetIndex == 0xFFFE) { - sheetIndex1 = -1; - sheetIndex2 = -1; - } - // Internal reference, deleted sheet(s) - else if (refFirstSheetIndex == 0xFFFF && refLastSheetIndex == 0xFFFF) { - sheetIndex1 = -2; - sheetIndex2 = -2; - } - // Stuffed up somewhere - else if ( - 0 > refFirstSheetIndex || refFirstSheetIndex > refLastSheetIndex || - refLastSheetIndex >= static_cast(m_book->m_sheetMap.size()) - ) { - sheetIndex1 = -102; - sheetIndex2 = -102; - } - // Internal reference, but to a macro sheet - else if (0 > xlSheetIndex1 || xlSheetIndex1 > xlSheetIndex2) { - sheetIndex1 = -3; - sheetIndex2 = -3; - } - else { - sheetIndex1 = xlSheetIndex1; - sheetIndex2 = xlSheetIndex2; - } - } - catch (...) { - sheetIndex1 = -101; - sheetIndex2 = -101; - } -} - -void Formula::getExternalSheetLocalRangeB57(int& sheetIndex1, int& sheetIndex2, - int rawExternalSheetIndex, int refFirstSheetIndex, - int refLastSheetIndex) const -{ - int xlSheetIndex1 = m_book->m_sheetMap[refFirstSheetIndex]; - int xlSheetIndex2 = m_book->m_sheetMap[refLastSheetIndex]; - - // External reference - if (rawExternalSheetIndex > 0) { - sheetIndex1 = -4; - sheetIndex2 = -4; - } - // Internal reference, deleted sheet(s) - else if (refFirstSheetIndex == -1 && refLastSheetIndex == -1) { - sheetIndex1 = -2; - sheetIndex2 = -2; - } - // Stuffed up somewhere - else if ( - 0 > refFirstSheetIndex || refFirstSheetIndex > refLastSheetIndex || - refLastSheetIndex >= static_cast(m_book->m_sheetMap.size()) - ) { - sheetIndex1 = -103; - sheetIndex2 = -103; - } - // Internal reference, but to a macro sheet - else if (0 > xlSheetIndex1 || xlSheetIndex1 > xlSheetIndex2) { - sheetIndex1 = -3; - sheetIndex2 = -3; - } - else { - sheetIndex1 = xlSheetIndex1; - sheetIndex2 = xlSheetIndex2; - } -} - -std::string Formula::rangeName3D(const std::vector& coords) const { - return sheetRange(coords[0], coords[1]) + "!" + - rangeName2D(coords[2], coords[3], coords[4], coords[5]); -} - -std::string Formula::rangeName3DRel(const std::vector& coords, - const std::vector& relationFlags, int rowIndex, - int colIndex, bool isR1C1) const -{ - auto cBegin = coords.begin(); - auto relBegin = relationFlags.begin(); - std::string shdesc; - - if (!relationFlags[0] && !relationFlags[1]) - shdesc = sheetRange(coords[0], coords[1]); - - std::string rngdesc = rangeName2DRel( - std::vector(cBegin + 2, cBegin + 6), - std::vector(relBegin + 2, relBegin + 6), - rowIndex, colIndex, isR1C1 - ); - if (shdesc.empty()) - return rngdesc; - return shdesc +"!"+ rngdesc; -} - -std::string Formula::rangeName2D(int rlo, int rhi, int clo, int chi, bool isR1C1) const { - if (isR1C1) - return ""; - if (rhi == rlo+1 && chi == clo+1) - return absoluteCellName(rlo, clo, isR1C1); - return absoluteCellName(rlo, clo, isR1C1) +":"+ absoluteCellName(rhi-1, chi-1, isR1C1); -} - -std::string Formula::rangeName2DRel(const std::vector& coords, - const std::vector& relationFlags, int rowIndex, - int colIndex, bool isR1C1) const -{ - if ((relationFlags[0] || relationFlags[1]) && rowIndex == 0) - isR1C1 = true; - if ((relationFlags[2] || relationFlags[3]) && colIndex == 0) - isR1C1 = true; - return relativeCellName( - coords[0], coords[2], - relationFlags[0], relationFlags[2], - rowIndex, colIndex, isR1C1 - ) - + ":" + - relativeCellName( - coords[1]-1, coords[3]-1, - relationFlags[1], relationFlags[3], - rowIndex, colIndex, isR1C1 - ); -} - -std::string Formula::sheetRange(int sheetIndex1, int sheetIndex2) const { - std::string sheetDesc = quotedSheetName(sheetIndex1); - if (sheetIndex1 != sheetIndex2 - 1) - sheetDesc += ":" + quotedSheetName(sheetIndex2-1); - return sheetDesc; -} - -std::string Formula::quotedSheetName(int sheetIndex) const { - std::string sheetName; - if (sheetIndex >= 0) - sheetName = m_book->m_sheetNames[sheetIndex]; - else if (sheetIndex == -1) - sheetName = "?internal; any sheet?"; - else if (sheetIndex == -2) - sheetName = "internal; deleted sheet"; - else if (sheetIndex == -3) - sheetName = "internal; macro sheet"; - else if (sheetIndex == -4) - sheetName = "<>"; - else - sheetName = "?error "+ std::to_string(sheetIndex) +"?"; - - if (sheetName.find("'") != std::string::npos) { - tools::replaceAll(sheetName, "'", "''"); - return "'" + sheetName + "'"; - } - if (sheetName.find(" ") != std::string::npos) - return "'" + sheetName + "'"; - return sheetName; -} - -std::string Formula::relativeCellName(int rowIndex, int colIndex, int relRowIndex, int relColIndex, - int bRowIndex, int bColIndex, bool isR1C1) const -{ - if (!relRowIndex && !relColIndex) - return absoluteCellName(rowIndex, colIndex, isR1C1); - // Must flip whole cell into R1C1 mode - if ((relRowIndex && !bRowIndex) || (relColIndex && !bColIndex)) - isR1C1 = true; - std::string rowName = relativeRowName(rowIndex, relRowIndex, bRowIndex, isR1C1); - std::string colName = relativeColName(colIndex, relColIndex, bColIndex, isR1C1); - if (isR1C1) - return rowName + colName; - return colName + rowName; -} - -std::string Formula::absoluteCellName(int rowIndex, int colIndex, bool isR1C1) const { - if (isR1C1) - return "R"+ std::to_string(rowIndex+1) +"C"+ std::to_string(colIndex+1); - return "$"+ colName(colIndex) +"$"+ std::to_string(rowIndex+1); -} - -std::string Formula::relativeRowName(int rowIndex, int relRowIndex, int bRowIndex, bool isR1C1) const { - // If no base rowIndex is provided, we have to return R1C1 - if (!bRowIndex) - isR1C1 = true; - if (!relRowIndex) { - if (isR1C1) - return "R" + std::to_string(rowIndex+1); - return "$" + std::to_string(rowIndex+1); - } - if (isR1C1) { - if (rowIndex) - return "R["+ std::to_string(rowIndex) +"]"; - return "R"; - } - return std::to_string((bRowIndex + rowIndex) % 65536 + 1); -} - -std::string Formula::relativeColName(int colIndex, int relColIndex, int bColIndex, bool isR1C1) const { - // If no base colIndex is provided, we have to return R1C1 - if (!bColIndex) - isR1C1 = true; - if (!relColIndex) { - if (isR1C1) - return "C" + std::to_string(colIndex+1); - return "$" + colName(colIndex); - } - if (isR1C1) { - if (colIndex) - return "C["+ std::to_string(colIndex) +"]"; - return "C"; - } - return colName((bColIndex + colIndex) % 256); -} - -std::string Formula::colName(int colIndex) const { - // Utility function: ``7`` => ``'H'``, ``27`` => ``'AB'`` - std::string alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - if (colIndex <= 25) - return {alphabet[colIndex]}; - else - return {alphabet[colIndex / 26 - 1], alphabet[colIndex % 26]}; -} - - -// Operand public: -Operand::Operand(int kind, const std::vector& value, int rank, - const std::string& text, const std::string& textValue) -{ - if (kind != -5) - m_kind = kind; - if (!value.empty()) - m_value = value; - m_text = text; - m_textValue = textValue; - // Rank is internal gizmo (operator precedence). It's used in reconstructing formula text - m_rank = rank; -} - - -// Ref3D public: -Ref3D::Ref3D(const std::vector& value) { - for (int i = 0; i < 6; i++) - m_coords.emplace_back(value[i]); - for (int i = 6; i < 12; i++) - m_relationFlags.emplace_back(value[i]); - if (m_relationFlags.empty()) - m_relationFlags = {0, 0, 0, 0, 0, 0}; -} - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/formula.hpp b/3rdparty/libs/fileext/excel/formula.hpp deleted file mode 100644 index fa0aafa..0000000 --- a/3rdparty/libs/fileext/excel/formula.hpp +++ /dev/null @@ -1,529 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file formula.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 18.10.2017 - */ -#pragma once - -#include -#include - -#include "book.hpp" -#include "format.hpp" - - -namespace excel { - -/** Operand kind list */ -enum { - oBOOL = 3, ///< Boolean - oERR = 4, ///< Error - oMSNG = 5, ///< Message - oNUM = 2, ///< Number - oREF = -1, ///< Reference - oREL = -2, ///< Relative - oSTRG = 1, ///< String - oUNK = 0 ///< Unknown -}; - -class Name; -class Book; -class Operand; -class Ref3D; - -/** - * @class Formula - * @brief - * Sheet formula information - */ -class Formula { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - Formula(Book* book); - - /** - * @brief - * Process formulas for NAME object - * @param[in] name - * Reference to NAME object - * @param[in] nameIndex - * Name index - * @param[in] level - * Recursion nesting level - * @since 1.0 - */ - void evaluateFormula(Name& name, int nameIndex, int level = 0); - - /** Pointer to parent Book object */ - Book* m_book; - -private: - /** - * @brief - * Execute binary operation - * @details - * Value | Representation - * :-----: | -------------- - * tAdd | `0x03: (_arith_argdict, oNUM, opr.add, 30, '+')` - * tSub | `0x04: (_arith_argdict, oNUM, opr.sub, 30, '-')` - * tMul | `0x05: (_arith_argdict, oNUM, opr.mul, 40, '*')` - * tDiv | `0x06: (_arith_argdict, oNUM, opr.truediv, 40, '/')` - * tPower | `0x07: (_arith_argdict, oNUM, _opr_pow, 50, '^')` - * tConcat | `0x08: (_strg_argdict, oSTRG, opr.add, 20, '&')` - * tLT | `0x09: (_cmp_argdict, oBOOL, _opr_lt, 10, '<')` - * tLE | `0x0A: (_cmp_argdict, oBOOL, _opr_le, 10, '<=')` - * tEQ | `0x0B: (_cmp_argdict, oBOOL, _opr_eq, 10, '=')` - * tGE | `0x0C: (_cmp_argdict, oBOOL, _opr_ge, 10, '>=')` - * tGT | `0x0D: (_cmp_argdict, oBOOL, _opr_gt, 10, '>')` - * tNE | `0x0E: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>')` - * @param[in] code - * Opeartion code - * @param[in,out] stack - * Stack of operands - * @since 1.0 - */ - void binOperation(int code, std::vector& stack) const; - - /** - * @brief - * Execute unary operation - * @details - * Value | Representation - * :---------: | -------------- - * Unary plus | `0x12: (lambda x: x, 70, '+', '')` - * Unary minus | `0x13: (lambda x: -x, 70, '-', '')` - * Percent | `0x14: (lambda x: x / 100.0, 60, '', '%')` - * @param[in] code - * Opeartion code - * @param[in,out] stack - * Stack of operands - * @param[in] resultKind - * Kind of result operand - * @since 1.0 - */ - void unaryOperation(int code, std::vector& stack, int resultKind) const; - - /** - * @brief - * Execute range operations - * @details - * Value | Representation - * :---------: | -------------- - * tIsectFuncs | `0: (max, min, max, min, max, min)` - * tRangeFuncs | `1: (min, max, min, max, min, max)` - * @param[out] coords - * Cell coords - * @param[in] leftValue - * Left-top end of range of cells - * @param[in] rightValue - * Right-bottom end of range of cells - * @param[in] functionType - * Function type (tIsectFuncs/tRangeFuncs) - * @since 1.0 - */ - void rangeOperation(std::vector& coords, Ref3D& leftValue, - Ref3D& rightValue, int functionType) const; - - /** - * @brief - * Get cell address - * @param[out] address - * Array in which address will be saved - * @param[in] data - * Binary data - * @param[in] pos - * Record start position - * @param[in] relDelta - * Function method - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void getCellAddress(std::vector& address, const std::string& data, int pos, - bool relDelta, int rowIndex = 0, int colIndex = 0) const; - - /** - * @brief - * Get cell range address - * @param[out] address1 - * Array in which address of left-top end of range will be saved - * @param[out] address2 - * Array in which address of right-bottom end of range will be saved - * @param[in] data - * Binary data - * @param[in] pos - * Record start position - * @param[in] relDelta - * Function method - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void getCellRangeAddress(std::vector& address1, std::vector& address2, - const std::string& data, int pos, bool relDelta, - int rowIndex = 0, int colIndex = 0) const; - - /** - * @brief - * Adjust cell address (BIFF 8) - * @param[out] address - * Array in which address will be saved - * @param[in] rowValue - * Row value - * @param[in] colValue - * Column value - * @param[in] relDelta - * Function method - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void adjustCellAddressBiff8(std::vector& address, int rowValue, int colValue, - bool relDelta, int rowIndex = 0, int colIndex = 0) const; - - /** - * @brief - * Adjust cell address (BIFF 7 or earlier) - * @param[out] address - * Array in which address will be saved - * @param[in] rowValue - * Row value - * @param[in] colValue - * Column value - * @param[in] relDelta - * Function method - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void adjustCellAddressBiff7(std::vector& address, int rowValue, int colValue, - bool relDelta, int rowIndex = 0, int colIndex = 0) const; - - /** - * @brief - * Get external sheet local range of cells - * @param[out] sheetIndex1 - * Sheet left-top index - * @param[out] sheetIndex2 - * Sheet right-bottom index - * @param[in] refIndex - * Reference index - * @since 1.0 - */ - void getExternalSheetLocalRange(int& sheetIndex1, int& sheetIndex2, int refIndex) const; - - /** - * @brief - * Get external sheet local range of cells (BIFF 7/5) - * @param[out] sheetIndex1 - * Sheet left-top index - * @param[out] sheetIndex2 - * Sheet right-bottom index - * @param[in] rawExternalSheetIndex - * External sheet raw index - * @param[in] refFirstSheetIndex - * Reference sheet left-top index - * @param[in] refLastSheetIndex - * Reference sheet right-bottom index - * @since 1.0 - */ - void getExternalSheetLocalRangeB57(int& sheetIndex1, int& sheetIndex2, - int rawExternalSheetIndex, int refFirstSheetIndex, - int refLastSheetIndex) const; - - /** - * @brief - * Get 3-dimensional range name. Utility function (assuming Excel's default sheetnames) - * @details - * Example: - * @code `Ref3D(1, 4, 5, 20, 7, 10)` -> `'Sheet2:Sheet3!$H$6:$J$20'` @endcode - * @param[in] coords - * Range coords - * @return - * Range name - * @since 1.0 - */ - std::string rangeName3D(const std::vector& coords) const; - - /** - * @brief - * Get relative 3-dimensional range name. Utility function - * @details - * Example: - * @code `Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))` - * In R1C1 mode -> `'Sheet1!R[-32]C[-13]:R[-23]C[12]'` - * In A1 mode -> Depends on base cell `(rowIndex, colIndex)` @endcode - * @param[in] coords - * Range coords - * @param[in] relationFlags - * Relation flags. Shows if address is relative (1) or absolute (0) - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Range name - * @since 1.0 - */ - std::string rangeName3DRel(const std::vector& coords, const std::vector& relationFlags, - int rowIndex = 0, int colIndex = 0, bool isR1C1 = false) const; - - /** - * @brief - * Get 2-dimensional range name. Utility function - * @details - * Example: - * @code `(5, 20, 7, 10)` -> `'$H$6:$J$20'` @endcode - * @param[in] rlo - * Relation first end - * @param[in] rhi - * Relation second end - * @param[in] clo - * Coords first end - * @param[in] chi - * Coords second end - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Range name - * @since 1.0 - */ - std::string rangeName2D(int rlo, int rhi, int clo, int chi, bool isR1C1 = false) const; - - /** - * @brief - * Get relative 2-dimensional range name. Utility function - * @param[in] coords - * Range coords - * @param[in] relationFlags - * Relation flags. Shows if address is relative (1) or absolute (0) - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Range name - * @since 1.0 - */ - std::string rangeName2DRel(const std::vector& coords, const std::vector& relationFlags, - int rowIndex = 0, int colIndex = 0, bool isR1C1 = false) const; - - /** - * @brief - * Get sheet range name - * @param[in] sheetIndex1 - * Sheet left-top index - * @param[in] sheetIndex2 - * Sheet right-bottom index - * @return - * Sheet range name - * @since 1.0 - */ - std::string sheetRange(int sheetIndex1, int sheetIndex2) const; - - /** - * @brief - * Get sheet name - * @param[in] sheetIndex - * Sheet index - * @return - * Sheet name - * @since 1.0 - */ - std::string quotedSheetName(int sheetIndex) const; - - /** - * @brief - * Get absolute cell name - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] relRowIndex - * Relative row index - * @param[in] relColIndex - * Relative column index - * @param[in] bRowIndex - * bRow index - * @param[in] bColIndex - * bColumn index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Absolute cell name - * @since 1.0 - */ - std::string relativeCellName(int rowIndex, int colIndex, int relRowIndex, int relColIndex, - int bRowIndex = 0, int bColIndex = 0, bool isR1C1 = false) const; - - /** - * @brief - * Get absolute cell name - * @details - * Example: - * @code `(5, 7)` -> `'$H$6'` or `'R8C6'` @endcode - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Absolute cell name - */ - std::string absoluteCellName(int rowIndex, int colIndex, bool isR1C1 = false) const; - - /** - * @brief - * Get relative row name - * @param[in] rowIndex - * Row index - * @param[in] relRowIndex - * Relative row index - * @param[in] bRowIndex - * bRow index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Relative row name - * @since 1.0 - */ - std::string relativeRowName(int rowIndex, int relRowIndex, int bRowIndex = 0, bool isR1C1 = false) const; - - /** - * @brief - * Get relative column name - * @param[in] colIndex - * Column index - * @param[in] relColIndex - * Relative column index - * @param[in] bColIndex - * bColumn index - * @param[in] isR1C1 - * If cell address id in R1C1 mode - * @return - * Relative column name - * @since 1.0 - */ - std::string relativeColName(int colIndex, int relColIndex, int bColIndex = 0, bool isR1C1 = false) const; - - /** - * @brief - * Get column name - * @param[in] colIndex - * Column index - * @return - * Column name - * @since 1.0 - */ - std::string colName(int colIndex) const; -}; - - -/** - * @class Operand - * @brief - * Evaluating formulas operands - * @details - * The following table describes kinds and how their values are represented - * Kind symbol | Kind number | Value representation - * :---------: | :---------: | -------------------- - * oBOOL | 3 | Boolean: 0 => False; 1 => True - * oERR | 4 | None, or error code (same as XL_CELL_ERROR in CELL class) - * oMSNG | 5 | Used by Excel as placeholder for missing function argument - * oNUM | 2 | Float. Note that there is no way of distinguishing dates - * oREF | -1 | Value is either None or non-empty list of absolute Ref3D instances - * oREL | -2 | Value is None or non-empty list of fully/partially relative Ref3D instances - * oSTRG | 1 | Unicode string - * oUNK | 0 | Kind is unknown or ambiguous. Value is None - */ -class Operand { -public: - /** - * @param[in] kind - * Operand kind - * @param[in] value - * Operand value - * @param[in] rank - * Operand rank - * @param[in] text - * Reconstituted text of original formula - * @param[in] textValue - * Operand constant text value - * @since 1.0 - */ - Operand(int kind = -5, const std::vector& value = {}, int rank = 0, - const std::string& text = "?", const std::string& textValue = ""); - - /** - * Operand value. None means that actual value of operand is a variable - * (depends on cell data), not a constant - */ - std::vector m_value; - /** Operand constant text value */ - std::string m_textValue; - /** Operand kind. oUNK means that kind of operand is not known unambiguously */ - int m_kind = oUNK; - /** - * Reconstituted text of original formula. Function names will be in English irrespective of - * original language, which doesn't seem to be recorded anywhere. - * Separator is ",", not ";" or whatever else might be more appropriate for end-user's locale - */ - std::string m_text = "?"; - /** Operand rank */ - int m_rank; -}; - - -/** - * @class Ref3D - * @brief - * Represents an absolute or relative 3-dimensional reference to box of one or more cells. - * @details - * There is necessarily no information available as to what cell(s) the reference could possibly - * be relative to. The caller must decide what if any use to make of `oREL` operands. - * Partially relative reference may well be a typo. So far, only one possibility of sheet-relative - * component in reference has been noticed: 2D reference located in the "current sheet". - * This will appear as `coords = (0, 1, ...)` and `relationFlags = (1, 1, ...)` - */ -class Ref3D { -public: - /** - * @param value - * Coordinates vector - * @since 1.0 - */ - Ref3D(const std::vector& value); - - /** - * Tuple of form `(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)` where - * `0 <= thingxlo <= thingx < thingxhi`. It is quite possible to have `thingx > nthings` - */ - std::vector m_coords; - /** - * 6-tuple of flags which indicate whether the corresponding (sheet|row|col)(lo|hi) - * is relative (1) or absolute (0) - */ - std::vector m_relationFlags; -}; - -} // End namespace \ No newline at end of file diff --git a/3rdparty/libs/fileext/excel/frmt.hpp b/3rdparty/libs/fileext/excel/frmt.hpp deleted file mode 100644 index daa90de..0000000 --- a/3rdparty/libs/fileext/excel/frmt.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file frmt.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 18.10.2017 - */ -#pragma once - -#include - - -namespace excel { - -/** - * @class Format - * @brief - * Number format information from FORMAT record - */ -class Format { -public: - Format() = default; - - /** - * @param[in] formatKey - * Key into @ref Book::m_formatMap - * @param[in] type - * String type - * @param[in] formatString - * Format string - * @since 1.0 - */ - Format(unsigned short formatKey, unsigned char type, std::string formatString); - - /** Key into @ref Book::m_formatMap */ - unsigned short m_formatKey = 0; - /** - * Classification that has been inferred from format string. Currently, this is used - * only to distinguish between numbers and dates - * Value | Description - * :---: | ----------- - * 0 | FUN (Unknown) - * 1 | FDT (Date) - * 2 | FNU (Number) - * 3 | FGE (General) - * 4 | FTX (Text) - */ - unsigned char m_type = FUN; - /** Format string */ - std::string m_formatString; -}; - -} // End namespace \ No newline at end of file diff --git a/3rdparty/libs/fileext/excel/libxls/endian.c b/3rdparty/libs/fileext/excel/libxls/endian.c new file mode 100644 index 0000000..3863bf1 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/endian.c @@ -0,0 +1,288 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2013 Bob Colbert + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS''AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include "../include/libxls/xlstypes.h" +#include "../include/libxls/endian.h" +#include "../include/libxls/ole.h" + +int xls_is_bigendian(void) +{ +#if defined (__BIG_ENDIAN__) + return 1; +#elif defined (__LITTLE_ENDIAN__) + return 0; +#else + static int n = 1; + + return (*(char *)&n == 0); +#endif +} + +DWORD xlsIntVal (DWORD i) +{ + unsigned char c1, c2, c3, c4; + + if (xls_is_bigendian()) { + c1 = i & 255; + c2 = (i >> 8) & 255; + c3 = (i >> 16) & 255; + c4 = (i >> 24) & 255; + + return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4; + } else { + return i; + } +} + +unsigned short xlsShortVal (short s) +{ + unsigned char c1, c2; + + if (xls_is_bigendian()) { + c1 = s & 255; + c2 = (s >> 8) & 255; + + return (c1 << 8) + c2; + } else { + return s; + } +} + +void xlsConvertDouble(unsigned char *d) +{ + unsigned char t; + int i; + + if (xls_is_bigendian()) { + for (i=0; i<4; i++) + { + t = d[7-i]; + d[7-i] = d[i]; + d[i] = t; + } + } +} + +void xlsConvertBof(BOF *b) +{ + b->id = xlsShortVal(b->id); + b->size = xlsShortVal(b->size); +} + +void xlsConvertBiff(BIFF *b) +{ + b->ver = xlsShortVal(b->ver); + b->type = xlsShortVal(b->type); + b->id_make = xlsShortVal(b->id_make); + b->year = xlsShortVal(b->year); + b->flags = xlsIntVal(b->flags); + b->min_ver = xlsIntVal(b->min_ver); +} + +void xlsConvertWindow(WIND1 *w) +{ + w->xWn = xlsShortVal(w->xWn); + w->yWn = xlsShortVal(w->yWn); + w->dxWn = xlsShortVal(w->dxWn); + w->dyWn = xlsShortVal(w->dyWn); + w->grbit = xlsShortVal(w->grbit); + w->itabCur = xlsShortVal(w->itabCur); + w->itabFirst = xlsShortVal(w->itabFirst); + w->ctabSel = xlsShortVal(w->ctabSel); + w->wTabRatio = xlsShortVal(w->wTabRatio); +} + +void xlsConvertSst(SST *s) +{ + s->num = xlsIntVal(s->num); + s->numofstr = xlsIntVal(s->numofstr); +} + +void xlsConvertXf5(XF5 *x) +{ + x->font=xlsShortVal(x->font); + x->format=xlsShortVal(x->format); + x->type=xlsShortVal(x->type); + x->align=xlsShortVal(x->align); + x->color=xlsShortVal(x->color); + x->fill=xlsShortVal(x->fill); + x->border=xlsShortVal(x->border); + x->linestyle=xlsShortVal(x->linestyle); +} + +void xlsConvertXf8(XF8 *x) +{ + W_ENDIAN(x->font); + W_ENDIAN(x->format); + W_ENDIAN(x->type); + D_ENDIAN(x->linestyle); + D_ENDIAN(x->linecolor); + W_ENDIAN(x->groundcolor); +} + +void xlsConvertFont(FONT *f) +{ + W_ENDIAN(f->height); + W_ENDIAN(f->flag); + W_ENDIAN(f->color); + W_ENDIAN(f->bold); + W_ENDIAN(f->escapement); +} + +void xlsConvertFormat(FORMAT *f) +{ + W_ENDIAN(f->index); +} + +void xlsConvertBoundsheet(BOUNDSHEET *b) +{ + D_ENDIAN(b->filepos); +} + +void xlsConvertColinfo(COLINFO *c) +{ + W_ENDIAN(c->first); + W_ENDIAN(c->last); + W_ENDIAN(c->width); + W_ENDIAN(c->xf); + W_ENDIAN(c->flags); +} + +void xlsConvertRow(ROW *r) +{ + W_ENDIAN(r->index); + W_ENDIAN(r->fcell); + W_ENDIAN(r->lcell); + W_ENDIAN(r->height); + W_ENDIAN(r->notused); + W_ENDIAN(r->notused2); + W_ENDIAN(r->flags); + W_ENDIAN(r->xf); +} + +void xlsConvertMergedcells(MERGEDCELLS *m) +{ + W_ENDIAN(m->rowf); + W_ENDIAN(m->rowl); + W_ENDIAN(m->colf); + W_ENDIAN(m->coll); +} + +void xlsConvertCol(COL *c) +{ + W_ENDIAN(c->row); + W_ENDIAN(c->col); + W_ENDIAN(c->xf); +} + +void xlsConvertFormula(FORMULA *f) +{ + W_ENDIAN(f->row); + W_ENDIAN(f->col); + W_ENDIAN(f->xf); + if(f->res == 0xFFFF) { + switch(f->resid) { + case 0: // string + case 1: // bool + case 2: // error + case 3: // empty string + break; + default: + xlsConvertDouble(&f->resid); + break; + } + } else { + xlsConvertDouble(&f->resid); + } + + W_ENDIAN(f->flags); + W_ENDIAN(f->len); + //fflush(stdout); left over from debugging? +} + +void xlsConvertFormulaArray(FARRAY *f) +{ + W_ENDIAN(f->row1); + W_ENDIAN(f->row2); + W_ENDIAN(f->col1); + W_ENDIAN(f->col2); + W_ENDIAN(f->flags); + W_ENDIAN(f->len); +} + +void xlsConvertHeader(OLE2Header *h) +{ + unsigned long i; + for (i=0; iid)/sizeof(h->id[0]); i++) + h->id[i] = xlsIntVal(h->id[i]); + for (i=0; iclid)/sizeof(h->clid[0]); i++) + h->clid[i] = xlsIntVal(h->clid[i]); + h->verminor = xlsShortVal(h->verminor); + h->verdll = xlsShortVal(h->verdll); + h->byteorder = xlsShortVal(h->byteorder); + h->lsectorB = xlsShortVal(h->lsectorB); + h->lssectorB = xlsShortVal(h->lssectorB); + h->reserved1 = xlsShortVal(h->reserved1); + h->reserved2 = xlsIntVal(h->reserved2); + h->reserved3 = xlsIntVal(h->reserved3); + + h->cfat = xlsIntVal(h->cfat); + h->dirstart = xlsIntVal(h->dirstart); + + h->reserved4 = xlsIntVal(h->reserved4); + + h->sectorcutoff = xlsIntVal(h->sectorcutoff); + h->sfatstart = xlsIntVal(h->sfatstart); + h->csfat = xlsIntVal(h->csfat); + h->difstart = xlsIntVal(h->difstart); + h->cdif = xlsIntVal(h->cdif); + for (i=0; iMSAT)/sizeof(h->MSAT[0]); i++) + h->MSAT[i] = xlsIntVal(h->MSAT[i]); +} + +void xlsConvertPss(PSS* pss) +{ + int i; + pss->bsize = xlsShortVal(pss->bsize); + pss->left = xlsIntVal(pss->left); + pss->right = xlsIntVal(pss->right); + pss->child = xlsIntVal(pss->child); + + for(i=0; i<8; i++) + pss->guid[i]=xlsShortVal(pss->guid[i]); + pss->userflags = xlsIntVal(pss->userflags); +/* TIME_T time[2]; */ + pss->sstart = xlsIntVal(pss->sstart); + pss->size = xlsIntVal(pss->size); + pss->proptype = xlsIntVal(pss->proptype); +} diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.c.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.c.h new file mode 100644 index 0000000..d47cea4 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.c.h @@ -0,0 +1,217 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +{ 0x00, "Unknown", ""}, +{ 0x06, "FORMULA", "Cell Formula" }, +{ 0x09, "BOF-BIFF2", "Beginning of File" }, +{ 0x0A, "EOF", "End of File" }, +{ 0x0C, "CALCCOUNT", "Iteration Count" }, +{ 0x0D, "CALCMODE", "Calculation Mode" }, +{ 0x0E, "PRECISION", "Precision" }, +{ 0x0F, "REFMODE", "Reference Mode" }, +{ 0x10, "DELTA", "Iteration Increment" }, +{ 0x11, "ITERATION", "Iteration Mode" }, +{ 0x12, "PROTECT", "Protection Flag" }, +{ 0x13, "PASSWORD", "Protection Password" }, +{ 0x14, "HEADER", "Print Header on Each Page" }, +{ 0x15, "FOOTER", "Print Footer on Each Page" }, +{ 0x16, "EXTERNCOUNT", "Number of External References" }, +{ 0x17, "EXTERNSHEET", "External Reference" }, +{ 0x18, "DEFINEDNAME", "User Defined Formulas (and others)" }, +{ 0x19, "?WINDOWPROTECT", " (biffview guessed)" }, +{ 0x1A, "VERTICALPAGEBREAKS", "Explicit Column Page Breaks" }, +{ 0x1B, "HORIZONTALPAGEBREAKS", "Explicit Row Page Breaks" }, +{ 0x1C, "NOTE", "Comment Associated with a Cell" }, +{ 0x1D, "SELECTION", "Current Selection" }, +{ 0x22, "DATEMODE", "1904 Date System" }, +{ 0x26, "LEFTMARGIN", "Left Margin Measurement" }, +{ 0x27, "RIGHTMARGIN", "Right Margin Measurement" }, +{ 0x28, "TOPMARGIN", "Top Margin Measurement" }, +{ 0x29, "BOTTOMMARGIN", "Bottom Margin Measurement" }, +{ 0x2A, "PRINTHEADERS", "Print Row/Column Labels" }, +{ 0x2B, "PRINTGRIDLINES", "Print Gridlines Flag" }, +{ 0x2F, "FILEPASS", "File Is Password-Protected" }, +{ 0x31, "FONT", "Font Description" }, +{ 0x3C, "CONTINUE", "Continues Long Records" }, +{ 0x3D, "WINDOW1", "Window Information" }, +{ 0x40, "BACKUP", "Save Backup Version of the File" }, +{ 0x41, "PANE", "Number of Panes and Their Position" }, +{ 0x42, "CODEPAGE", "Default Code Page" }, +{ 0x4D, "PLS", "Environment-Specific Print Record" }, +{ 0x50, "DCON", "Data Consolidation Information" }, +{ 0x51, "DCONREF", "Data Consolidation References" }, +{ 0x52, "DCONNAME", "Data Consolidation Named References" }, +{ 0x55, "DEFCOLWIDTH", "Default Width for Columns" }, +{ 0x59, "XCT", "CRN Record Count" }, +{ 0x5A, "CRN", "Nonresident Operands" }, +{ 0x5B, "FILESHARING", "File-Sharing Information" }, +{ 0x5C, "WRITEACCESS", "Write Access User Name" }, +{ 0x5D, "OBJ", "Describes a Graphic Object" }, +{ 0x5E, "UNCALCED", "Recalculation Status" }, +{ 0x5F, "SAVERECALC", "Recalculate Before Save" }, +{ 0x60, "TEMPLATE", "Workbook Is a Template" }, +{ 0x63, "OBJPROTECT", "Objects Are Protected" }, +{ 0x7D, "COLINFO", "Column Formatting Information" }, +{ 0x7F, "IMDATA", "Image Data" }, +{ 0x80, "GUTS", "Size of Row and Column Gutters" }, +{ 0x81, "WSBOOL", "Additional Workspace Information" }, +{ 0x82, "GRIDSET", "State Change of Gridlines Option" }, +{ 0x83, "HCENTER", "Center Between Horizontal Margins" }, +{ 0x84, "VCENTER", "Center Between Vertical Margins" }, +{ 0x85, "BOUNDSHEET", "Sheet Information" }, +{ 0x86, "WRITEPROT", "Workbook Is Write-Protected" }, +{ 0x87, "ADDIN", "Workbook Is an Add-in Macro" }, +{ 0x88, "EDG", "Edition Globals" }, +{ 0x89, "PUB", "Publisher" }, +{ 0x8C, "COUNTRY", "Default Country and WIN.INI Country" }, +{ 0x8D, "HIDEOBJ", "Object Display Options" }, +{ 0x90, "SORT", "Sorting Options" }, +{ 0x91, "SUB", "Subscriber" }, +{ 0x92, "PALETTE", "Color Palette Definition" }, +{ 0x94, "LHRECORD", ".WK? File Conversion Information" }, +{ 0x95, "LHNGRAPH", "Named Graph Information" }, +{ 0x96, "SOUND", "Sound Note" }, +{ 0x99, "STANDARDWIDTH", "Standard Column Width" }, +{ 0x98, "LPR", "Sheet Was Printed Using LINE.PRINT" }, +{ 0x9A, "FNGROUPNAME", "Function Group Name" }, +{ 0x9B, "FILTERMODE", "Sheet Contains Filtered List" }, +{ 0x9C, "FNGROUPCOUNT", "Built-in Function Group Count" }, +{ 0x9D, "AUTOFILTERINFO", "Drop-Down Arrow Count" }, +{ 0x9E, "AUTOFILTER", "AutoFilter Data" }, +{ 0xA0, "SCL", "Window Zoom Magnification" }, +{ 0xA1, "SETUP", "Page Setup" }, +{ 0xA9, "COORDLIST", "Polygon Object Vertex Coordinates" }, +{ 0xAB, "GCW", "Global Column-Width Flags" }, +{ 0xAE, "SCENMAN", "Scenario Output Data" }, +{ 0xAF, "SCENARIO", "Scenario Data" }, +{ 0xB0, "SXVIEW", "View Definition" }, +{ 0xB1, "SXVD", "View Fields" }, +{ 0xB2, "SXVI", "View Item" }, +{ 0xB4, "SXIVD", "Row/Column Field IDs" }, +{ 0xB5, "SXLI", "Line Item Array" }, +{ 0xB6, "SXPI", "Page Item" }, +{ 0xB8, "DOCROUTE", "Routing Slip Information" }, +{ 0xB9, "RECIPNAME", "Recipient Name" }, +{ 0xBC, "SHRFMLA", "Shared Formula" }, +{ 0xBD, "MULRK", "Multiple RK Cells" }, +{ 0xBE, "MULBLANK", "Multiple Blank Cells" }, +{ 0xC1, "MMS", "ADDMENU/DELMENU Record Group Count" }, +{ 0xC2, "ADDMENU", "Menu Addition" }, +{ 0xC3, "DELMENU", "Menu Deletion" }, +{ 0xC5, "SXDI", "Data Item" }, +{ 0xC6, "SXDB", "PivotTable Cache Data" }, +{ 0xCD, "SXSTRING", "String" }, +{ 0xD0, "SXTBL", "Multiple Consolidation Source Info" }, +{ 0xD1, "SXTBRGIITM", "Page Item Name Count" }, +{ 0xD2, "SXTBPG", "Page Item Indexes" }, +{ 0xD3, "OBPROJ", "Visual Basic Project" }, +{ 0xD5, "SXIDSTM", "Stream ID" }, +{ 0xD6, "RSTRING", "Cell with Character Formatting" }, +{ 0xD7, "DBCELL", "Stream Offsets" }, +{ 0xDA, "BOOKBOOL", "Workbook Option Flag" }, +{ 0xDC, "PARAMQRY-SXEXT", "Query Parameters-External Source Information" }, +{ 0xDD, "SCENPROTECT", "Scenario Protection" }, +{ 0xDE, "OLESIZE", "Size of OLE Object" }, +{ 0xDF, "UDDESC", "Description String for Chart Autoformat" }, +{ 0xE0, "XF", "Extended Format" }, +{ 0xE1, "INTERFACEHDR", "Beginning of User Interface Records" }, +{ 0xE2, "INTERFACEEND", "End of User Interface Records" }, +{ 0xE3, "SXVS", "View Source" }, +{ 0xE5, "CSPAN", "Cells span" }, +{ 0xEA, "TABIDCONF", "Sheet Tab ID of Conflict History" }, +{ 0xEB, "MSODRAWINGGROUP", "Microsoft Office Drawing Group" }, +{ 0xEC, "MSODRAWING", "Microsoft Office Drawing" }, +{ 0xED, "MSODRAWINGSELECTION", "Microsoft Office Drawing Selection" }, +{ 0xEF, "PHONETIC-INFO", "Specifies the default format for phonetic strings " }, +{ 0xF0, "SXRULE", "PivotTable Rule Data" }, +{ 0xF1, "SXEX", "PivotTable View Extended Information" }, +{ 0xF2, "SXFILT", "PivotTable Rule Filter" }, +{ 0xF6, "SXNAME", "PivotTable Name" }, +{ 0xF7, "SXSELECT", "PivotTable Selection Information" }, +{ 0xF8, "SXPAIR", "PivotTable Name Pair" }, +{ 0xF9, "SXFMLA", "PivotTable Parsed Expression" }, +{ 0xFB, "SXFORMAT", "PivotTable Format Record" }, +{ 0xFC, "SST", "Shared String Table" }, +{ 0xFD, "LABELSST", "Cell Value, String Constant/SST" }, +{ 0xFF, "EXTSST", "Extended Shared String Table" }, +{ 0x100, "SXVDEX", "Extended PivotTable View Fields" }, +{ 0x103, "SXFORMULA", "PivotTable Formula Record" }, +{ 0x122, "SXDBEX", "PivotTable Cache Data" }, +{ 0x13D, "TABID", "Sheet Tab Index Array" }, +{ 0x160, "USESELFS", "Natural Language Formulas Flag" }, +{ 0x161, "DSF", "Double Stream File" }, +{ 0x162, "XL5MODIFY", "Flag for DSF" }, +{ 0x1A5, "FILESHARING2", "File-Sharing Information for Shared Lists" }, +{ 0x1A9, "USERBVIEW", "Workbook Custom View Settings" }, +{ 0x1AA, "USERSVIEWBEGIN", "Custom View Settings" }, +{ 0x1AB, "USERSVIEWEND", "End of Custom View Records" }, +{ 0x1AD, "QSI", "External Data Range" }, +{ 0x1AE, "SUPBOOK", "Supporting Workbook" }, +{ 0x1AF, "PROT4REV", "Shared Workbook Protection Flag" }, +{ 0x1B0, "CONDFMT", "Conditional Formatting Range Information" }, +{ 0x1B1, "CF", "Conditional Formatting Conditions" }, +{ 0x1B2, "DVAL", "Data Validation Information" }, +{ 0x1B5, "DCONBIN", "Data Consolidation Information" }, +{ 0x1B6, "TXO", "Text Object" }, +{ 0x1B7, "REFRESHALL", "Refresh Flag" }, +{ 0x1B8, "HLINK", "Hyperlink" }, +{ 0x1BA, "CODENAME", "Name of a workbook object," }, +{ 0x1BB, "SXFDBTYPE", "SQL Datatype Identifier" }, +{ 0x1BC, "PROT4REVPASS", "Shared Workbook Protection Password" }, +{ 0x1BE, "DV", "Data Validation Criteria" }, +{ 0x1C1, "RECALC_ID", "identifier of the recalculation engine" }, +{ 0x200, "DIMENSIONS", "Cell Table Size" }, +{ 0x201, "BLANK", "Cell Value, Blank Cell" }, +{ 0x203, "NUMBER", "Cell Value, Floating-Point Number" }, +{ 0x204, "LABEL", "Cell Value, String Constant" }, +{ 0x205, "BOOLERR", "Cell Value, Boolean or Error" }, +{ 0x207, "STRING", "String Value of a Formula" }, +{ 0x208, "ROW", "Describes a Row" }, +{ 0x209, "BOF-BIFF3", "Beginning of File" }, +{ 0x20B, "INDEX", "Index Record" }, +{ 0x218, "NAME", "Defined Name" }, +{ 0x221, "ARRAY", "Array-Entered Formula" }, +{ 0x223, "EXTERNNAME", "Externally Referenced Name" }, +{ 0x225, "DEFAULTROWHEIGHT", "Default Row Height" }, +{ 0x236, "TABLE", "Data Table" }, +{ 0x23E, "WINDOW2", "Sheet Window Information" }, +{ 0x27E, "RK", "Cell Value, RK Number" }, +{ 0x293, "STYLE", "Style Information" }, +{ 0x409, "BOF-BIFF4", "Beginning of File" }, +{ 0x41E, "FORMAT", "Number Format" }, +{ 0x4BC, "?FORMULA-RELATED=?(BC=SHRFMLA))", "Formula related, always before there are 0x06 (FORMULA)" }, +{ 0x809, "BOF-BIFF5/7/8", "Beginning of File" }, +{ 0x863, "BOOKEXT", "Specifies properties of a workbook file." }, +{ 0xFFF, "", "" }, diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.h new file mode 100644 index 0000000..a84b7d6 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/brdb.h @@ -0,0 +1,62 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +struct str_brdb +{ + WORD opcode; + char * name; /* printable name */ + char * desc; /* printable description */ +}; +typedef struct str_brdb record_brdb; + +record_brdb brdb[] = + { +#include "../libxls/brdb.c.h" + }; + +static int get_brbdnum(int id) +{ + + int i; + i=0; + do + { + if (brdb[i].opcode==id) + return i; + i++; + } + while (brdb[i].opcode!=0xFFF); + return 0; +} diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/endian.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/endian.h new file mode 100644 index 0000000..68a7c75 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/endian.h @@ -0,0 +1,61 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2013 Bob Colbert + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "../libxls/xlsstruct.h" + +int xls_is_bigendian(void); +DWORD xlsIntVal (DWORD i); +unsigned short xlsShortVal (short s); + +void xlsConvertHeader(OLE2Header *h); +void xlsConvertPss(PSS* pss); + +void xlsConvertDouble(BYTE *d); +void xlsConvertBof(BOF *b); +void xlsConvertBiff(BIFF *b); +void xlsConvertWindow(WIND1 *w); +void xlsConvertSst(SST *s); +void xlsConvertXf5(XF5 *x); +void xlsConvertXf8(XF8 *x); +void xlsConvertFont(FONT *f); +void xlsConvertFormat(FORMAT *f); +void xlsConvertBoundsheet(BOUNDSHEET *b); +void xlsConvertColinfo(COLINFO *c); +void xlsConvertRow(ROW *r); +void xlsConvertMergedcells(MERGEDCELLS *m); +void xlsConvertCol(COL *c); +void xlsConvertFormula(FORMULA *f); +void xlsConvertFormulaArray(FARRAY *f); +void xlsConvertHeader(OLE2Header *h); +void xlsConvertPss(PSS* pss); + +#define W_ENDIAN(a) a=xlsShortVal(a) +#define D_ENDIAN(a) a=xlsIntVal(a) diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/locale.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/locale.h new file mode 100644 index 0000000..3ae3911 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/locale.h @@ -0,0 +1,44 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2020 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifdef HAVE_XLOCALE_H +#include +#endif +#include + +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) +typedef _locale_t xls_locale_t; +#else +typedef locale_t xls_locale_t; +#endif + +xls_locale_t xls_createlocale(void); +void xls_freelocale(xls_locale_t locale); +size_t xls_wcstombs_l(char *restrict s, const wchar_t *restrict pwcs, size_t n, xls_locale_t loc); diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/ole.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/ole.h new file mode 100644 index 0000000..e648723 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/ole.h @@ -0,0 +1,191 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef OLE_INCLUDE +#define OLE_INCLUDE + +#ifdef _MSC_VER +#include +typedef SSIZE_T ssize_t; +#endif + +#include // FILE * + +#include "../libxls/xlstypes.h" + +#if defined(_AIX) || defined(__sun) +#pragma pack(1) +#else +#pragma pack(push, 1) +#endif + +typedef struct TIME_T +{ + DWORD LowDate; + DWORD HighDate; +} +TIME_T; + +typedef struct OLE2Header +{ + DWORD id[2]; //D0CF11E0 A1B11AE1 + DWORD clid[4]; + WORD verminor; //0x3e + WORD verdll; //0x03 + WORD byteorder; + WORD lsectorB; + WORD lssectorB; + + WORD reserved1; + DWORD reserved2; + DWORD reserved3; + + DWORD cfat; // count full sectors + DWORD dirstart; + + DWORD reserved4; + + DWORD sectorcutoff; // min size of a standard stream ; if less than this then it uses short-streams + DWORD sfatstart; // first short-sector or EOC + DWORD csfat; // count short sectors + DWORD difstart; // first sector master sector table or EOC + DWORD cdif; // total count + DWORD MSAT[109]; // First 109 MSAT +} +OLE2Header; + +#pragma pack(pop) + +//----------------------------------------------------------------------------------- +typedef struct st_olefiles +{ + long count; + struct st_olefiles_data + { + char* name; + DWORD start; + DWORD size; + } + * file; +} +st_olefiles; + +typedef struct OLE2 +{ + FILE* file; + const void *buffer; + size_t buffer_len; + size_t buffer_pos; + + WORD lsector; + WORD lssector; + DWORD cfat; + DWORD dirstart; + + DWORD sectorcutoff; + DWORD sfatstart; + DWORD csfat; + DWORD difstart; + DWORD cdif; + + DWORD* SecID; // regular sector data + DWORD SecIDCount; + + DWORD* SSecID; // short sector data + DWORD SSecIDCount; + + BYTE* SSAT; // directory of short sectors + DWORD SSATCount; + + st_olefiles files; +} +OLE2; + +typedef struct OLE2Stream +{ + OLE2* ole; + DWORD start; + size_t pos; + size_t cfat; + size_t size; + size_t fatpos; + BYTE* buf; + DWORD bufsize; + BYTE eof; + BYTE sfat; // short +} +OLE2Stream; + +#if defined(_AIX) || defined(__sun) +#pragma pack(1) +#else +#pragma pack(push, 1) +#endif + +typedef struct PSS +{ + char name[64]; + WORD bsize; + BYTE type; //STGTY +#define PS_EMPTY 00 +#define PS_USER_STORAGE 01 +#define PS_USER_STREAM 02 +#define PS_USER_ROOT 05 + BYTE flag; //COLOR +#define BLACK 1 + DWORD left; + DWORD right; + DWORD child; + WORD guid[8]; + DWORD userflags; + TIME_T time[2]; + DWORD sstart; + DWORD size; + DWORD proptype; +} +PSS; + +#pragma pack(pop) + +ssize_t ole2_read(void* buf,size_t size,size_t count,OLE2Stream* olest); +OLE2Stream* ole2_sopen(OLE2* ole,DWORD start, size_t size); +int ole2_seek(OLE2Stream* olest,DWORD ofs); +OLE2Stream* ole2_fopen(OLE2* ole, const char *file); +void ole2_fclose(OLE2Stream* ole2st); +OLE2* ole2_open_file(const char *file); +OLE2* ole2_open_buffer(const void *buffer, size_t len); +void ole2_close(OLE2* ole2); + +#endif diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/xlsstruct.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlsstruct.h new file mode 100644 index 0000000..56f343c --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlsstruct.h @@ -0,0 +1,546 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef XLS_STRUCT_INC +#define XLS_STRUCT_INC + +#include "../libxls/ole.h" + +#define XLS_RECORD_EOF 0x000A +#define XLS_RECORD_DEFINEDNAME 0x0018 +#define XLS_RECORD_NOTE 0x001C +#define XLS_RECORD_1904 0x0022 +#define XLS_RECORD_FILEPASS 0x002F +#define XLS_RECORD_CONTINUE 0x003C +#define XLS_RECORD_WINDOW1 0x003D +#define XLS_RECORD_CODEPAGE 0x0042 +#define XLS_RECORD_OBJ 0x005D +#define XLS_RECORD_MERGEDCELLS 0x00E5 +#define XLS_RECORD_DEFCOLWIDTH 0x0055 +#define XLS_RECORD_COLINFO 0x007D +#define XLS_RECORD_BOUNDSHEET 0x0085 +#define XLS_RECORD_PALETTE 0x0092 +#define XLS_RECORD_MULRK 0x00BD +#define XLS_RECORD_MULBLANK 0x00BE +#define XLS_RECORD_RSTRING 0x00D6 +#define XLS_RECORD_DBCELL 0x00D7 +#define XLS_RECORD_XF 0x00E0 +#define XLS_RECORD_MSODRAWINGGROUP 0x00EB +#define XLS_RECORD_MSODRAWING 0x00EC +#define XLS_RECORD_SST 0x00FC +#define XLS_RECORD_LABELSST 0x00FD +#define XLS_RECORD_EXTSST 0x00FF +#define XLS_RECORD_TXO 0x01B6 +#define XLS_RECORD_HYPERREF 0x01B8 +#define XLS_RECORD_BLANK 0x0201 +#define XLS_RECORD_NUMBER 0x0203 +#define XLS_RECORD_LABEL 0x0204 +#define XLS_RECORD_BOOLERR 0x0205 +#define XLS_RECORD_STRING 0x0207 // only follows a formula +#define XLS_RECORD_ROW 0x0208 +#define XLS_RECORD_INDEX 0x020B +#define XLS_RECORD_ARRAY 0x0221 // Array-entered formula +#define XLS_RECORD_DEFAULTROWHEIGHT 0x0225 +#define XLS_RECORD_FONT 0x0031 // spec says 0x0231 but Excel expects 0x0031 +#define XLS_RECORD_FONT_ALT 0x0231 +#define XLS_RECORD_WINDOW2 0x023E +#define XLS_RECORD_RK 0x027E +#define XLS_RECORD_STYLE 0x0293 +#define XLS_RECORD_FORMULA 0x0006 +#define XLS_RECORD_FORMULA_ALT 0x0406 // Apple Numbers bug +#define XLS_RECORD_FORMAT 0x041E +#define XLS_RECORD_BOF 0x0809 + +#define BLANK_CELL XLS_RECORD_BLANK // compat + +#if defined(_AIX) || defined(__sun) +#pragma pack(1) +#else +#pragma pack(push, 1) +#endif + +typedef struct BOF +{ + WORD id; + WORD size; +} +BOF; + +typedef struct BIFF +{ + WORD ver; + WORD type; + WORD id_make; + WORD year; + DWORD flags; + DWORD min_ver; +} +BIFF; + +typedef struct WIND1 +{ + WORD xWn; + WORD yWn; + WORD dxWn; + WORD dyWn; + WORD grbit; + WORD itabCur; + WORD itabFirst; + WORD ctabSel; + WORD wTabRatio; +} +WIND1; + +typedef struct BOUNDSHEET +{ + DWORD filepos; + BYTE type; + BYTE visible; + char name[1]; +} +BOUNDSHEET; + +typedef struct ROW +{ + WORD index; + WORD fcell; // first cell, 0-indexed + WORD lcell; // last cell, 1-indexed + WORD height; + WORD notused; + WORD notused2; //used only for BIFF3-4 + WORD flags; + WORD xf; +} +ROW; + +typedef struct COL +{ + WORD row; + WORD col; + WORD xf; +} +COL; + + +typedef struct FORMULA // BIFF8 +{ + WORD row; + WORD col; + WORD xf; + // next 8 bytes either a IEEE double, or encoded on a byte basis + BYTE resid; + BYTE resdata[5]; + WORD res; + WORD flags; + BYTE chn[4]; // BIFF8 + WORD len; + BYTE value[1]; //var +} +FORMULA; + +typedef struct FARRAY // BIFF8 +{ + WORD row1; + WORD row2; + BYTE col1; + BYTE col2; + WORD flags; + BYTE chn[4]; // BIFF8 + WORD len; + BYTE value[1]; //var +} +FARRAY; + +typedef struct RK +{ + WORD row; + WORD col; + WORD xf; + DWORD value; +} +RK; + +typedef struct MULRK +{ + WORD row; + WORD col; + struct { + WORD xf; + DWORD value; + } rk[1]; + //WORD last_col; +} +MULRK; + +typedef struct MULBLANK +{ + WORD row; + WORD col; + WORD xf[1]; + //WORD last_col; +} +MULBLANK; + +typedef struct BLANK +{ + WORD row; + WORD col; + WORD xf; +} +BLANK; + +typedef struct LABEL +{ + WORD row; + WORD col; + WORD xf; + BYTE value[1]; // var +} +LABEL; + +typedef struct BOOLERR +{ + WORD row; + WORD col; + WORD xf; + BYTE value; + BYTE iserror; +} +BOOLERR; + +typedef struct SST +{ + DWORD num; + DWORD numofstr; + BYTE strings[1]; +} +SST; + +typedef struct XF5 +{ + WORD font; + WORD format; + WORD type; + WORD align; + WORD color; + WORD fill; + WORD border; + WORD linestyle; +} +XF5; + +typedef struct XF8 +{ + WORD font; + WORD format; + WORD type; + BYTE align; + BYTE rotation; + BYTE ident; + BYTE usedattr; + DWORD linestyle; + DWORD linecolor; + WORD groundcolor; +} +XF8; + +typedef struct BR_NUMBER +{ + WORD row; + WORD col; + WORD xf; + double value; +} +BR_NUMBER; + +typedef struct COLINFO +{ + WORD first; + WORD last; + WORD width; + WORD xf; + WORD flags; +/* There should be an unused WORD field at the end here. However, some files in + * the wild report it as a BYTE, which results in a boundary-check parse error. + * Since the value is ignored anyway, we'll just pretend it was never there. + * + * See issue https://github.com/evanmiller/libxls/issues/27 + */ +} +COLINFO; + +typedef struct MERGEDCELLS +{ + WORD rowf; + WORD rowl; + WORD colf; + WORD coll; +} +MERGEDCELLS; + +typedef struct FONT +{ + WORD height; + WORD flag; + WORD color; + WORD bold; + WORD escapement; + BYTE underline; + BYTE family; + BYTE charset; + BYTE notused; + char name[1]; +} +FONT; + +typedef struct FORMAT +{ + WORD index; + char value[1]; +} +FORMAT; + +#pragma pack(pop) + +//--------------------------------------------------------- + +typedef struct st_sheet +{ + DWORD count; // Count of sheets + struct st_sheet_data + { + DWORD filepos; + BYTE visibility; + BYTE type; + char * name; + } + * sheet; +} +st_sheet; + +typedef struct st_font +{ + DWORD count; // Count of FONT's + struct st_font_data + { + WORD height; + WORD flag; + WORD color; + WORD bold; + WORD escapement; + BYTE underline; + BYTE family; + BYTE charset; + char * name; + } + * font; +} +st_font; + +typedef struct st_format +{ + DWORD count; // Count of FORMAT's + struct st_format_data + { + WORD index; + char *value; + } + * format; +} +st_format; + +typedef struct st_xf +{ + DWORD count; // Count of XF + // XF** xf; + struct st_xf_data + { + WORD font; + WORD format; + WORD type; + BYTE align; + BYTE rotation; + BYTE ident; + BYTE usedattr; + DWORD linestyle; + DWORD linecolor; + WORD groundcolor; + } + * xf; +} +st_xf; + + +typedef struct st_sst +{ + DWORD count; + DWORD lastid; + DWORD continued; + DWORD lastln; + DWORD lastrt; + DWORD lastsz; + struct str_sst_string + { + char * str; + } + * string; +} +st_sst; + + +typedef struct st_cell +{ + DWORD count; + struct st_cell_data + { + WORD id; + WORD row; + WORD col; + WORD xf; + char * str; // String value; + double d; + int32_t l; + WORD width; // Width of col + WORD colspan; + WORD rowspan; + BYTE isHidden; // Is cell hidden + } + * cell; +} +st_cell; + + +typedef struct st_row +{ + // DWORD count; + WORD lastcol; // numCols - 1 + WORD lastrow; // numRows - 1 + struct st_row_data + { + WORD index; + WORD fcell; + WORD lcell; + WORD height; + WORD flags; + WORD xf; + BYTE xfflags; + st_cell cells; + } + * row; +} +st_row; + + +typedef struct st_colinfo +{ + DWORD count; // Count of COLINFO + struct st_colinfo_data + { + WORD first; + WORD last; + WORD width; + WORD xf; + WORD flags; + } + * col; +} +st_colinfo; + +typedef struct xlsWorkBook +{ + //FILE* file; + OLE2Stream* olestr; + int32_t filepos; // position in file + + //From Header (BIFF) + BYTE is5ver; + BYTE is1904; + WORD type; + WORD activeSheetIdx; // index of the active sheet + + //Other data + WORD codepage; // Charset codepage + char* charset; + st_sheet sheets; + st_sst sst; // SST table + st_xf xfs; // XF table + st_font fonts; + st_format formats; // FORMAT table + + char *summary; // ole file + char *docSummary; // ole file + + void *converter; + void *utf16_converter; + void *utf8_locale; +} +xlsWorkBook; + +typedef struct xlsWorkSheet +{ + DWORD filepos; + WORD defcolwidth; + st_row rows; + xlsWorkBook *workbook; + st_colinfo colinfo; +} +xlsWorkSheet; + +#ifdef __cplusplus +typedef struct st_cell::st_cell_data xlsCell; +typedef struct st_row::st_row_data xlsRow; +#else +typedef struct st_cell_data xlsCell; +typedef struct st_row_data xlsRow; +#endif + +typedef struct xls_summaryInfo +{ + BYTE *title; + BYTE *subject; + BYTE *author; + BYTE *keywords; + BYTE *comment; + BYTE *lastAuthor; + BYTE *appName; + BYTE *category; + BYTE *manager; + BYTE *company; +} +xlsSummaryInfo; + +typedef void (*xls_formula_handler)(WORD bof, WORD len, BYTE *formula); + +#endif diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstool.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstool.h new file mode 100644 index 0000000..5c91529 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstool.h @@ -0,0 +1,55 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "../libxls/xlsstruct.h" + +void verbose(char* str); + +char *codepage_decode(const char *s, size_t len, xlsWorkBook *pWB); +char *unicode_decode(const char *s, size_t len, xlsWorkBook *pWB); +char *transcode_utf16_to_utf8(const char *s, size_t len); +char *get_string(const char *s, size_t len, BYTE is2, xlsWorkBook *pWB); +DWORD xls_getColor(const WORD color,WORD def); + +void xls_showBookInfo(xlsWorkBook* pWB); +void xls_showROW(struct st_row_data* row); +void xls_showColinfo(struct st_colinfo_data* col); +void xls_showCell(struct st_cell_data* cell); +void xls_showFont(struct st_font_data* font); +void xls_showXF(XF8* xf); +void xls_showFormat(struct st_format_data* format); +char* xls_getfcell(xlsWorkBook* pWB, struct st_cell_data* cell, BYTE *label); +char* xls_getCSS(xlsWorkBook* pWB); +void xls_showBOF(BOF* bof); diff --git a/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstypes.h b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstypes.h new file mode 100644 index 0000000..84bd7cd --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/libxls/xlstypes.h @@ -0,0 +1,52 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef XLS_TYPES_INC +#define XLS_TYPES_INC + +#include +#include + +typedef unsigned char BYTE; +typedef uint16_t WORD; +typedef uint32_t DWORD; + +#ifdef _WIN32 +typedef unsigned __int64 unsigned64_t; +#else +typedef uint64_t unsigned64_t; +#endif + +#endif diff --git a/3rdparty/libs/fileext/excel/libxls/include/xls.h b/3rdparty/libs/fileext/excel/libxls/include/xls.h new file mode 100644 index 0000000..89b8b92 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/include/xls.h @@ -0,0 +1,93 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Evan Miller ''AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef XLS_INCLUDE +#define XLS_INCLUDE + +#ifdef __cplusplus +namespace xls { +extern "C" { +#endif + +#include "libxls/xlstypes.h" +#include "libxls/xlsstruct.h" +#include "libxls/xlstool.h" + +typedef enum { + LIBXLS_OK, + LIBXLS_ERROR_OPEN, + LIBXLS_ERROR_SEEK, + LIBXLS_ERROR_READ, + LIBXLS_ERROR_PARSE, + LIBXLS_ERROR_MALLOC, + LIBXLS_ERROR_UNSUPPORTED_ENCRYPTION, + LIBXLS_ERROR_NULL_ARGUMENT +} xls_error_t; + +const char* xls_getVersion(void); +const char* xls_getError(xls_error_t code); + +int xls(int debug); // Set debug. Force library to load? +void xls_set_formula_hander(xls_formula_handler handler); + +xls_error_t xls_parseWorkBook(xlsWorkBook* pWB); +xls_error_t xls_parseWorkSheet(xlsWorkSheet* pWS); + +// Preferred API +// charset - convert 16bit strings within the spread sheet to this 8-bit encoding (UTF-8 default) +xlsWorkBook *xls_open_file(const char *file, const char *charset, xls_error_t *outError); +xlsWorkBook *xls_open_buffer(const unsigned char *data, size_t data_len, + const char *charset, xls_error_t *outError); +void xls_close_WB(xlsWorkBook* pWB); + +// Historical API +xlsWorkBook* xls_open(const char *file,const char *charset); +#define xls_close xls_close_WB + +xlsWorkSheet * xls_getWorkSheet(xlsWorkBook* pWB,int num); +void xls_close_WS(xlsWorkSheet* pWS); + +xlsSummaryInfo *xls_summaryInfo(xlsWorkBook* pWB); +void xls_close_summaryInfo(xlsSummaryInfo *pSI); + +// utility function +xlsRow *xls_row(xlsWorkSheet* pWS, WORD cellRow); +xlsCell *xls_cell(xlsWorkSheet* pWS, WORD cellRow, WORD cellCol); + +#ifdef __cplusplus +} // extern c block +} // namespace +#endif + +#endif + diff --git a/3rdparty/libs/fileext/excel/libxls/locale.c b/3rdparty/libs/fileext/excel/libxls/locale.c new file mode 100644 index 0000000..9ee5d95 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/locale.c @@ -0,0 +1,64 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2020 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include "config.h" +#include +#include "../include/libxls/locale.h" + +xls_locale_t xls_createlocale(void) { +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + return _create_locale(LC_CTYPE, ".65001"); +#else + return newlocale(LC_CTYPE_MASK, "C.UTF-8", NULL); +#endif +} + +void xls_freelocale(xls_locale_t locale) { + if (!locale) + return; +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + _free_locale(locale); +#else + freelocale(locale); +#endif +} + +size_t xls_wcstombs_l(char *restrict s, const wchar_t *restrict pwcs, size_t n, xls_locale_t loc) { +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + return _wcstombs_l(s, pwcs, n, loc); +#elif defined(HAVE_WCSTOMBS_L) + return wcstombs_l(s, pwcs, n, loc); +#else + locale_t oldlocale = uselocale(loc); + size_t result = wcstombs(s, pwcs, n); + uselocale(oldlocale); + return result; +#endif +} diff --git a/3rdparty/libs/fileext/excel/libxls/ole.c b/3rdparty/libs/fileext/excel/libxls/ole.c new file mode 100644 index 0000000..4a75be4 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/ole.c @@ -0,0 +1,863 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "config.h" + +#include +#include +#include +#include + +#include "../include/libxls/ole.h" +#include "../include/libxls/xlstool.h" +#include "../include/libxls/endian.h" + +extern int xls_debug; + +//#define OLE_DEBUG + +//static const DWORD MSATSECT = 0xFFFFFFFC; // -4 +//static const DWORD FATSECT = 0xFFFFFFFD; // -3 +static const DWORD ENDOFCHAIN = 0xFFFFFFFE; // -2 +static const DWORD FREESECT = 0xFFFFFFFF; // -1 + +static size_t sector_pos(OLE2* ole2, DWORD sid); +static ssize_t sector_read(OLE2* ole2, void *buffer, size_t buffer_len, DWORD sid); +static ssize_t read_MSAT(OLE2* ole2, OLE2Header *oleh); +static void *ole_malloc(size_t len); +static void *ole_realloc(void *ptr, size_t len); + +static void *ole_malloc(size_t len) { + if (len > (1<<24) || len == 0) { + return NULL; + } + return malloc(len); +} + +static void *ole_realloc(void *ptr, size_t len) { + if (len > (1<<24) || len == 0) { + free(ptr); + return NULL; + } + return realloc(ptr, len); +} + +static int ole2_validate_sector_chain(DWORD *chain, DWORD chain_count, DWORD chain_start) { + DWORD count = 0; + DWORD sector = chain_start; + while (sector != ENDOFCHAIN) { + if (sector >= chain_count) + return 0; + + if (++count >= chain_count) + return 0; + + sector = xlsIntVal(chain[sector]); + } + return 1; +} + +static int ole2_validate_sector(DWORD sector, OLE2 *ole) { + if (sector >= ole->SecIDCount) { + if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SecID[%d]\n", + (int)sector, ole->SecIDCount); + return 0; + } + + if (sector == xlsIntVal(ole->SecID[sector])) { + if (xls_debug) fprintf(stderr, "Error: Sector loop detected, SecID[%d] = %d\n", + (int)sector, (int)sector); + return 0; + } + + return 1; +} + +// Read next sector of stream +static int ole2_bufread(OLE2Stream* olest) +{ + BYTE *ptr; + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_bufread (start)\n"); +#endif + + if (olest == NULL || olest->ole == NULL) + return -1; + + if ((DWORD)olest->fatpos!=ENDOFCHAIN) + { + if(olest->sfat) { + if (olest->ole->SSAT == NULL || olest->buf == NULL || olest->ole->SSecID == NULL) + return -1; + + if (olest->fatpos*olest->ole->lssector + olest->bufsize > olest->ole->SSATCount) { + if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SSAT\n", (int)olest->fatpos); + return -1; + } + + ptr = olest->ole->SSAT + olest->fatpos*olest->ole->lssector; + memcpy(olest->buf, ptr, olest->bufsize); + + if (olest->fatpos >= olest->ole->SSecIDCount) { + if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SSecID[%d]\n", + (int)olest->fatpos, olest->ole->SSecIDCount); + return -1; + } + + olest->fatpos=xlsIntVal(olest->ole->SSecID[olest->fatpos]); + olest->pos=0; + olest->cfat++; + } else { + if ((int)olest->fatpos < 0 || + sector_read(olest->ole, olest->buf, olest->bufsize, olest->fatpos) == -1) { + if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", (int)olest->fatpos); + return -1; + } + + if (!ole2_validate_sector(olest->fatpos, olest->ole)) { + return -1; + } + + olest->fatpos = xlsIntVal(olest->ole->SecID[olest->fatpos]); + olest->pos=0; + olest->cfat++; + } + } +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_bufread (end)\n"); +#endif + // else printf("ENDOFCHAIN!!!\n"); + return 0; +} + +// Read part of stream +ssize_t ole2_read(void* buf, size_t size, size_t count, OLE2Stream* olest) +{ + size_t didReadCount=0; + size_t totalReadCount; + + totalReadCount=size*count; + + // olest->size inited to -1 + // printf("===== ole2_read(%ld bytes)\n", totalReadCount); + + if ((long)olest->size>=0 && !olest->sfat) // directory is -1 + { + size_t rem; + rem = olest->size - (olest->cfat*olest->ole->lsector+olest->pos); + totalReadCount = remeof=1; + + // printf(" rem=%ld olest->size=%d - subfunc=%d\n", rem, olest->size, (olest->cfat*olest->ole->lsector+olest->pos) ); + //printf(" totalReadCount=%d (rem=%d size*count=%ld)\n", totalReadCount, rem, size*count); + } + + while ((!olest->eof) && (didReadCount < totalReadCount)) + { + unsigned long remainingBytes; + size_t needToReadCount; + + needToReadCount = totalReadCount - didReadCount; + remainingBytes = olest->bufsize - olest->pos; + + if (needToReadCount < remainingBytes) { // does the current sector contain all the data I need? + memcpy((BYTE*)buf + didReadCount, olest->buf + olest->pos, needToReadCount); + olest->pos += needToReadCount; + didReadCount += needToReadCount; + } else { + memcpy((BYTE*)buf + didReadCount, olest->buf + olest->pos, remainingBytes); + olest->pos += remainingBytes; + didReadCount += remainingBytes; + if (ole2_bufread(olest) == -1) + return -1; + } + if (((DWORD)olest->fatpos == ENDOFCHAIN) && (olest->pos >= olest->bufsize)) { + olest->eof=1; + } + } + if (didReadCount > totalReadCount) + return -1; + + // printf(" didReadCount=%ld EOF=%d\n", didReadCount, olest->eof); + // printf("=====\n"); + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_read (end)\n"); + fprintf(stderr, "start: %d \n",olest->start); + fprintf(stderr, "pos: %d \n",(int)olest->pos); + fprintf(stderr, "cfat: %d \n",(int)olest->cfat); + fprintf(stderr, "size: %d \n",(int)olest->size); + fprintf(stderr, "fatpos: %d \n",(int)olest->fatpos); + fprintf(stderr, "bufsize: %d \n",(int)olest->bufsize); + fprintf(stderr, "eof: %d \n",olest->eof); +#endif + + return didReadCount; +} + +// Open stream in logical ole file +OLE2Stream* ole2_sopen(OLE2* ole,DWORD start, size_t size) +{ + OLE2Stream* olest=NULL; + int success = 1; + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_sopen start=%Xh\n", start); +#endif + + olest = calloc(1, sizeof(OLE2Stream)); + olest->ole=ole; + olest->size=size; + olest->fatpos=start; + olest->start=start; + olest->cfat=-1; + if((long)size > 0 && size < (size_t)ole->sectorcutoff) { + olest->bufsize=ole->lssector; + olest->sfat = 1; + } else { + olest->bufsize=ole->lsector; + } + if ((olest->buf = ole_malloc(olest->bufsize)) == NULL) { + success = 0; + goto cleanup; + } + + if (olest->sfat) { + if (!ole2_validate_sector_chain(ole->SSecID, ole->SSecIDCount, start)) { + success = 0; + goto cleanup; + } + } else { + if (!ole2_validate_sector_chain(ole->SecID, ole->SecIDCount, start)) { + success = 0; + goto cleanup; + } + } + + if (ole2_bufread(olest) == -1) { + success = 0; + goto cleanup; + } + +cleanup: + if (!success) { + ole2_fclose(olest); + olest = NULL; + } + + // if(xls_debug) printf("sopen: sector=%d next=%d\n", start, olest->fatpos); + return olest; +} + +// Move in stream +int ole2_seek(OLE2Stream* olest,DWORD ofs) +{ +#ifdef OLE_DEBUG + fprintf(stderr, "SEEK %x\n", ofs); +#endif + if(olest->sfat) { + ldiv_t div_rez=ldiv(ofs,olest->ole->lssector); + int i; + olest->fatpos=olest->start; + + if (div_rez.quot!=0) + { + for (i=0;ifatpos >= olest->ole->SSecIDCount) + return -1; + olest->fatpos=xlsIntVal(olest->ole->SSecID[olest->fatpos]); + } + } + + if (ole2_bufread(olest) == -1) + return -1; + + olest->pos=div_rez.rem; + olest->eof=0; + olest->cfat=div_rez.quot; + //printf("%i=%i %i\n",ofs,div_rez.quot,div_rez.rem); + } else { + ldiv_t div_rez=ldiv(ofs,olest->ole->lsector); + int i; +#ifdef OLE_DEBUG + fprintf(stderr, "seeking fatpos%lu start %u\n", olest->fatpos, olest->start); +#endif + olest->fatpos=olest->start; + + if (div_rez.quot!=0) + { + for (i=0;ifatpos, olest->ole)) + return -1; + olest->fatpos=xlsIntVal(olest->ole->SecID[olest->fatpos]); + } + } + + if (ole2_bufread(olest) == -1) + return -1; + + olest->pos=div_rez.rem; + olest->eof=0; + olest->cfat=div_rez.quot; + //printf("%i=%i %i\n",ofs,div_rez.quot,div_rez.rem); + } + return 0; +} + +// Open logical file contained in physical OLE file +OLE2Stream* ole2_fopen(OLE2* ole, const char *file) +{ + int i; + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_fopen %s\n", file); +#endif + + for (i=0;ifiles.count;i++) { + char *str = ole->files.file[i].name; +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_fopen found %s\n", str); +#endif + if (str && strcmp(str,file)==0) // newer versions of Excel don't write the "Root Entry" string for the first set of data + { + return ole2_sopen(ole,ole->files.file[i].start,ole->files.file[i].size); + } + } + return NULL; +} + +static int ole2_fseek(OLE2 *ole2, size_t pos) { + if (ole2->file) + return fseek(ole2->file, pos, SEEK_SET); + + if (pos > ole2->buffer_len) + return -1; + + ole2->buffer_pos = pos; + return 0; +} + +// Will read up to `size' bytes from the input, and pad the rest of `size' with +// zeros if the input file or buffer is short. +static size_t ole2_fread(OLE2 *ole2, void *buffer, size_t buffer_len, size_t size) { + if (size > buffer_len) + return 0; + + memset(buffer, 0, size); + + if (ole2->file) + return fread(buffer, 1, size, ole2->file) > 0; + + if (ole2->buffer_pos >= ole2->buffer_len) + return 0; + + if (ole2->buffer_pos + size > ole2->buffer_len) + size = ole2->buffer_len - ole2->buffer_pos; + + memcpy(buffer, (const char *)ole2->buffer + ole2->buffer_pos, size); + ole2->buffer_pos += size; + + return 1; +} + +// read header and check magic numbers +static ssize_t ole2_read_header(OLE2 *ole) { + ssize_t bytes_read = 0, total_bytes_read = 0; + OLE2Header *oleh = malloc(sizeof(OLE2Header)); + if (ole2_fread(ole, oleh, sizeof(OLE2Header), sizeof(OLE2Header)) != 1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += sizeof(OLE2Header); + xlsConvertHeader(oleh); + + // make sure the file looks good. Note: this code only works on Little Endian machines + if(oleh->id[0] != 0xE011CFD0 || oleh->id[1] != 0xE11AB1A1 || oleh->byteorder != 0xFFFE) { + if (xls_debug) fprintf(stderr, "Not an excel file\n"); + total_bytes_read = -1; + goto cleanup; + } + + //ole->lsector=(WORD)pow(2,oleh->lsector); + //ole->lssector=(WORD)pow(2,oleh->lssector); + ole->lsector=512; + ole->lssector=64; + + if (oleh->lsectorB != 9 || oleh->lssectorB != 6) { // 2**9 == 512, 2**6 == 64 + if (xls_debug) fprintf(stderr, "Unexpected sector size\n"); + total_bytes_read = -1; + goto cleanup; + } + + ole->cfat=oleh->cfat; + ole->dirstart=oleh->dirstart; + ole->sectorcutoff=oleh->sectorcutoff; + ole->sfatstart=oleh->sfatstart; + ole->csfat=oleh->csfat; + ole->difstart=oleh->difstart; + ole->cdif=oleh->cdif; + ole->files.count=0; + +#ifdef OLE_DEBUG + fprintf(stderr, "==== OLE HEADER ====\n"); + //printf ("Header Size: %i \n", sizeof(OLE2Header)); + //printf ("id[0]-id[1]: %X-%X \n", oleh->id[0], oleh->id[1]); + fprintf(stderr, "verminor: %X \n",oleh->verminor); + fprintf(stderr, "verdll: %X \n",oleh->verdll); + //printf ("Byte order: %X \n",oleh->byteorder); + fprintf(stderr, "sect len: %X (%i)\n",ole->lsector,ole->lsector); // ole + fprintf(stderr, "mini len: %X (%i)\n",ole->lssector,ole->lssector); // ole + fprintf(stderr, "Fat sect.: %i \n",oleh->cfat); + fprintf(stderr, "Dir Start: %i \n",oleh->dirstart); + + fprintf(stderr, "Mini Cutoff: %i \n",oleh->sectorcutoff); + fprintf(stderr, "MiniFat Start: %X \n",oleh->sfatstart); + fprintf(stderr, "Count MFat: %i \n",oleh->csfat); + fprintf(stderr, "Dif start: %X \n",oleh->difstart); + fprintf(stderr, "Count Dif: %i \n",oleh->cdif); + fprintf(stderr, "Fat Size: %u (0x%X) \n",oleh->cfat*ole->lsector,oleh->cfat*ole->lsector); +#endif + // read directory entries + if ((bytes_read = read_MSAT(ole, oleh)) == -1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + +cleanup: + free(oleh); + + return total_bytes_read; +} + +static ssize_t ole2_read_body(OLE2 *ole) { + // reuse this buffer + PSS *pss = NULL; + OLE2Stream *olest = NULL; + char* name = NULL; + ssize_t bytes_read = 0, total_bytes_read = 0; + + if ((olest = ole2_sopen(ole,ole->dirstart, -1)) == NULL) { + total_bytes_read = -1; + goto cleanup; + } + pss = malloc(sizeof(PSS)); + do { + if ((bytes_read = ole2_read(pss,1,sizeof(PSS),olest)) == -1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + xlsConvertPss(pss); + if (pss->bsize > sizeof(pss->name)) { + total_bytes_read = -1; + goto cleanup; + } + name=transcode_utf16_to_utf8(pss->name, pss->bsize); +#ifdef OLE_DEBUG + fprintf(stderr, "OLE NAME: %s count=%d\n", name, (int)ole->files.count); +#endif + if (pss->type == PS_USER_ROOT || pss->type == PS_USER_STREAM) // (name!=NULL) // + { + +#ifdef OLE_DEBUG + fprintf(stderr, "OLE TYPE: %s file=%d size=%d\n", + pss->type == PS_USER_ROOT ? "root" : "user", + (int)ole->files.count, (int)pss->size); +#endif + ole->files.file = realloc(ole->files.file,(ole->files.count+1)*sizeof(struct st_olefiles_data)); + ole->files.file[ole->files.count].name=name; + ole->files.file[ole->files.count].start=pss->sstart; + ole->files.file[ole->files.count].size=pss->size; + ole->files.count++; + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "name: %s (size=%d [c=%c])\n", name, pss->bsize, name ? name[0]:' '); + fprintf(stderr, "bsize %i\n",pss->bsize); + fprintf(stderr, "type %i\n",pss->type); + fprintf(stderr, "flag %i\n",pss->flag); + fprintf(stderr, "left %X\n",pss->left); + fprintf(stderr, "right %X\n",pss->right); + fprintf(stderr, "child %X\n",pss->child); + fprintf(stderr, "guid %.4X-%.4X-%.4X-%.4X %.4X-%.4X-%.4X-%.4X\n", + pss->guid[0],pss->guid[1],pss->guid[2],pss->guid[3], + pss->guid[4],pss->guid[5],pss->guid[6],pss->guid[7]); + fprintf(stderr, "user flag %.4X\n",pss->userflags); + fprintf(stderr, "sstart %.4d\n",pss->sstart); + fprintf(stderr, "size %.4d\n",pss->size); +#endif + if(pss->sstart == ENDOFCHAIN) { + if (xls_debug) verbose("END OF CHAIN\n"); + } else if(pss->type == PS_USER_STREAM) { + } else if(pss->type == PS_USER_ROOT) { + DWORD sector, k, blocks; + BYTE *wptr; + size_t bytes_left; + + blocks = (pss->size + (ole->lsector - 1)) / ole->lsector; // count partial +#ifdef OLE_DEBUG + fprintf(stderr, "OLE BLOCKS: %d = (%d + (%d - 1))/%d\n", + (int)blocks, (int)pss->size, (int)ole->lsector, (int)ole->lsector); +#endif + if ((ole->SSAT = ole_realloc(ole->SSAT, blocks*ole->lsector)) == NULL) { + total_bytes_read = -1; + goto cleanup; + } + ole->SSATCount = blocks*ole->lsector; + // printf("blocks %d\n", blocks); + + sector = pss->sstart; + wptr = (BYTE*)ole->SSAT; + bytes_left = blocks*ole->lsector; + for(k=0; klsector; + wptr += ole->lsector; + bytes_left -= ole->lsector; + sector = xlsIntVal(ole->SecID[sector]); + } + } + } else { + free(name); + } + } while (!olest->eof); + +cleanup: + if (olest) + ole2_fclose(olest); + if (pss) + free(pss); + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_read_body: %d bytes\n", (int)total_bytes_read); +#endif + + return total_bytes_read; +} + +OLE2 *ole2_read_header_and_body(OLE2 *ole) { + if (ole2_read_header(ole) == -1) { + ole2_close(ole); + return NULL; + } + + if (ole2_read_body(ole) == -1) { + ole2_close(ole); + return NULL; + } + + return ole; +} + +// Open in-memory buffer +OLE2 *ole2_open_buffer(const void *buffer, size_t len) { + OLE2 *ole = calloc(1, sizeof(OLE2)); + + ole->buffer = buffer; + ole->buffer_len = len; + + return ole2_read_header_and_body(ole); +} + +// Open physical file +OLE2* ole2_open_file(const char *file) +{ + OLE2* ole = NULL; + +#ifdef OLE_DEBUG + fprintf(stderr, "----------------------------------------------\n"); + fprintf(stderr, "ole2_open_file %s\n", file); +#endif + + if(xls_debug) printf("ole2_open: %s\n", file); + ole = calloc(1, sizeof(OLE2)); + + if (!(ole->file=fopen(file, "rb"))) { + if(xls_debug) fprintf(stderr, "File not found\n"); + free(ole); + return NULL; + } + + return ole2_read_header_and_body(ole); +} + +void ole2_close(OLE2* ole2) +{ + int i; + if (ole2->file) + fclose(ole2->file); + + for(i=0; ifiles.count; ++i) { + free(ole2->files.file[i].name); + } + free(ole2->files.file); + free(ole2->SecID); + free(ole2->SSecID); + free(ole2->SSAT); + free(ole2); +} + +void ole2_fclose(OLE2Stream* ole2st) +{ + free(ole2st->buf); + free(ole2st); +} + +// Return offset in bytes of a sector from its sid +static size_t sector_pos(OLE2* ole2, DWORD sid) +{ + return 512 + sid * ole2->lsector; +} +// Read one sector from its sid +static ssize_t sector_read(OLE2* ole2, void *buffer, size_t buffer_len, DWORD sid) +{ + size_t num; + size_t seeked; + + if ((seeked = ole2_fseek(ole2, sector_pos(ole2, sid))) != 0) { + if (xls_debug) fprintf(stderr, "Error: wanted to seek to sector %u (0x%x) loc=%u\n", sid, sid, + (unsigned int)sector_pos(ole2, sid)); + return -1; + } + + if ((num = ole2_fread(ole2, buffer, buffer_len, ole2->lsector)) != 1) { + if (xls_debug) fprintf(stderr, "Error: fread wanted 1 got %lu loc=%u\n", (unsigned long)num, + (unsigned int)sector_pos(ole2, sid)); + return -1; + } + + return ole2->lsector; +} + +// read first 109 sectors of MSAT from header +static ssize_t read_MSAT_header(OLE2* ole2, OLE2Header* oleh, DWORD sectorCount) { + BYTE *sector = (BYTE*)ole2->SecID; + ssize_t bytes_read = 0, total_bytes_read = 0; + size_t bytes_left = ole2->SecIDCount * sizeof(DWORD); + DWORD sectorNum; + + for (sectorNum = 0; sectorNum < sectorCount && sectorNum < 109; sectorNum++) + { + if ((bytes_read = sector_read(ole2, sector, bytes_left, oleh->MSAT[sectorNum])) == -1) { + if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", oleh->MSAT[sectorNum]); + return -1; + } + sector += ole2->lsector; + bytes_left -= ole2->lsector; + total_bytes_read += bytes_read; + } + return total_bytes_read; +} + +// Add additional sectors of the MSAT +static ssize_t read_MSAT_body(OLE2 *ole2, DWORD sectorOffset, DWORD sectorCount) { + DWORD sid = ole2->difstart; + ssize_t bytes_read = 0, total_bytes_read = 0; + DWORD sectorNum = sectorOffset; + + DWORD *sector = ole_malloc(ole2->lsector); + //printf("sid=%u (0x%x) sector=%u\n", sid, sid, ole2->lsector); + while (sid != ENDOFCHAIN && sid != FREESECT) // FREESECT only here due to an actual file that requires it (old Apple Numbers bug) + { + int posInSector; + // read MSAT sector + if ((bytes_read = sector_read(ole2, sector, ole2->lsector, sid)) == -1) { + total_bytes_read = -1; + if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", sid); + goto cleanup; + } + total_bytes_read += bytes_read; + + // read content + for (posInSector = 0; posInSector < (ole2->lsector-4)/4; posInSector++) + { + DWORD s = sector[posInSector]; + //printf(" s[%d]=%d (0x%x)\n", posInSector, s, s); + + if (s != ENDOFCHAIN && s != FREESECT) // see patch in Bug 31. For very large files + { + if (sectorNum == sectorCount) { + if (xls_debug) fprintf(stderr, "Error: Unable to seek to sector #%d\n", s); + total_bytes_read = -1; + goto cleanup; + } + if ((bytes_read = sector_read(ole2, (BYTE*)(ole2->SecID)+sectorNum*ole2->lsector, + (ole2->SecIDCount * sizeof(DWORD) - sectorNum*ole2->lsector), s)) == -1) { + if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", s); + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + sectorNum++; + } + } + if (sid == sector[posInSector]) { + if (xls_debug) fprintf(stderr, "Error: Loop detected in sector #%d\n", sid); + total_bytes_read = -1; + goto cleanup; + } + sid = sector[posInSector]; + //printf(" s[%d]=%d (0x%x)\n", posInSector, sid, sid); + } +#ifdef OLE_DEBUG + if(xls_debug) { + //printf("==== READ IN SECTORS FOR MSAT TABLE====\n"); + int i; + for(i=0; i<512/4; ++i) { // just the first block + if(ole2->SecID[i] != FREESECT) printf("SecID[%d]=%d\n", i, ole2->SecID[i]); + } + } + //exit(0); +#endif + +cleanup: + free(sector); + return total_bytes_read; +} + +// read in short table +static ssize_t read_MSAT_trailer(OLE2 *ole2) { + ssize_t total_bytes_read = 0; + DWORD sector, k; + BYTE *wptr; + size_t bytes_left; + + if(ole2->sfatstart == ENDOFCHAIN) + return 0; + + if ((ole2->SSecID = ole_malloc(ole2->csfat*(size_t)ole2->lsector)) == NULL) { + return -1; + } + ole2->SSecIDCount = ole2->csfat*(size_t)ole2->lsector/4; + sector = ole2->sfatstart; + wptr=(BYTE*)ole2->SSecID; + bytes_left = ole2->SSecIDCount * sizeof(DWORD); + for(k=0; kcsfat; ++k) { + if (sector == ENDOFCHAIN || sector_read(ole2, wptr, bytes_left, sector) == -1) { + total_bytes_read = -1; + goto cleanup; + } + if (!ole2_validate_sector(sector, ole2)) { + total_bytes_read = -1; + goto cleanup; + } + wptr += ole2->lsector; + bytes_left -= ole2->lsector; + total_bytes_read += ole2->lsector; + sector = xlsIntVal(ole2->SecID[sector]); + } +#ifdef OLE_DEBUG + if(xls_debug) { + int i; + for(i=0; icsfat; ++i) { + if(ole2->SSecID[i] != FREESECT) fprintf(stderr, "SSecID[%d]=%d\n", i, ole2->SSecID[i]); + } + } +#endif + +cleanup: + return total_bytes_read; +} + + +// Read MSAT +static ssize_t read_MSAT(OLE2* ole2, OLE2Header* oleh) +{ + // reconstitution of the MSAT + DWORD count = ole2->cfat; + if(count == 0 || count > (1 << 24)) { + if (xls_debug) fprintf(stderr, "Error: MSAT count %u out-of-bounds\n", count); + return -1; + } + + ssize_t total_bytes_read = 0; + ssize_t bytes_read = 0; + + ole2->SecIDCount = count*ole2->lsector/4; + if ((ole2->SecID = ole_malloc(ole2->SecIDCount * sizeof(DWORD))) == NULL) { + total_bytes_read = -1; + goto cleanup; + } + + if ((bytes_read = read_MSAT_header(ole2, oleh, count)) == -1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + + if ((bytes_read = read_MSAT_body(ole2, total_bytes_read / ole2->lsector, count)) == -1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + + if ((bytes_read = read_MSAT_trailer(ole2)) == -1) { + total_bytes_read = -1; + goto cleanup; + } + total_bytes_read += bytes_read; + +cleanup: + if (total_bytes_read == -1) { + if (ole2->SecID) { + free(ole2->SecID); + ole2->SecID = NULL; + } + if (ole2->SSecID) { + free(ole2->SSecID); + ole2->SSecID = NULL; + } + } + + return total_bytes_read; +} diff --git a/3rdparty/libs/fileext/excel/libxls/xls.c b/3rdparty/libs/fileext/excel/libxls/xls.c new file mode 100644 index 0000000..7bc7a30 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/xls.c @@ -0,0 +1,1819 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "config.h" + +#include +#include +#include +#include + +#ifdef HAVE_ICONV +#include +#endif + +#include +#include +#include +#include + +#include "../include/libxls/endian.h" +#include "../include/libxls/locale.h" +#include "../include/xls.h" + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +//#define DEBUG_DRAWINGS +int xls_debug = 0; + +static double NumFromRk(DWORD drk); +static xls_formula_handler formula_handler; + +static xls_error_t xls_addSST(xlsWorkBook* pWB, SST* sst, DWORD size); +static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size); +static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size); +static xls_error_t xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET* bs, DWORD size); +static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row); +static xls_error_t xls_makeTable(xlsWorkSheet* pWS); +static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS, BOF* bof, BYTE* buf); +static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size); +static xls_error_t xls_addXF8(xlsWorkBook* pWB, XF8* xf); +static xls_error_t xls_addXF5(xlsWorkBook* pWB, XF5* xf); +static xls_error_t xls_addColinfo(xlsWorkSheet* pWS, COLINFO* colinfo); +static xls_error_t xls_mergedCells(xlsWorkSheet* pWS, BOF* bof, BYTE* buf); +static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS); +static xls_error_t xls_formatColumn(xlsWorkSheet* pWS); +static void xls_dumpSummary(char *buf, int isSummary, xlsSummaryInfo *pSI); + +#if defined(_AIX) || defined(__sun) +#pragma pack(1) +#else +#pragma pack(push, 1) +#endif + +typedef struct { + uint32_t format[4]; + uint32_t offset; +} sectionList; + +typedef struct { + uint16_t sig; + uint16_t _empty; + uint32_t os; + uint32_t format[4]; + uint32_t count; + sectionList secList[1]; +} header; + +typedef struct { + uint32_t propertyID; + uint32_t sectionOffset; +} propertyList; + +typedef struct { + uint32_t length; + uint32_t numProperties; + propertyList properties[1]; +} sectionHeader; + +typedef struct { + uint32_t propertyID; + uint32_t data[1]; +} property; + +#pragma pack(pop) + +int xls(int debug) +{ + xls_debug = debug; + return 1; +} + +static xls_error_t xls_addSST(xlsWorkBook* pWB,SST* sst,DWORD size) +{ + verbose("xls_addSST"); + + pWB->sst.continued=0; + pWB->sst.lastln=0; + pWB->sst.lastid=0; + pWB->sst.lastrt=0; + pWB->sst.lastsz=0; + + if (sst->num > (1<<26)) // 64 MB + return LIBXLS_ERROR_MALLOC; + + if (pWB->sst.string) + return LIBXLS_ERROR_PARSE; + + if ((pWB->sst.string = calloc(pWB->sst.count = sst->num, + sizeof(struct str_sst_string))) == NULL) + return LIBXLS_ERROR_MALLOC; + + return xls_appendSST(pWB, sst->strings, size - offsetof(SST, strings)); +} + +static xls_error_t xls_appendSST(xlsWorkBook* pWB, BYTE* buf, DWORD size) +{ + DWORD ln; // String character count + DWORD ofs; // Current offset in SST buffer + DWORD rt; // Count of rich text formatting runs + DWORD sz; // Size of asian phonetic settings block + BYTE flag; // String flags + char* ret = NULL; + + if (xls_debug) { + printf("xls_appendSST %u\n", size); + } + + sz = rt = ln = 0; // kch + ofs=0; + + while(ofssst.continued) { + ln=pWB->sst.lastln; + rt=pWB->sst.lastrt; + sz=pWB->sst.lastsz; + } else { + if (ofs + 2 > size) { + return LIBXLS_ERROR_PARSE; + } + ln = buf[ofs+0] + (buf[ofs+1] << 8); + rt = 0; + sz = 0; + + ofs+=2; + } + + if (xls_debug) { + printf("ln=%u\n", ln); + } + + // Read flags + if ( !pWB->sst.continued || (pWB->sst.continued && ln != 0) ) { + if (ofs + sizeof(BYTE) > size) { + return LIBXLS_ERROR_PARSE; + } + flag=*(BYTE *)(buf+ofs); + ofs++; + + // Count of rich text formatting runs + if (flag & 0x8) { + if (ofs + sizeof(WORD) > size) { + return LIBXLS_ERROR_PARSE; + } + rt = buf[ofs+0] + (buf[ofs+1] << 8); + ofs+=2; + } + + // Size of asian phonetic settings block + if (flag & 0x4) { + if (ofs + sizeof(DWORD) > size) { + return LIBXLS_ERROR_PARSE; + } + sz = buf[ofs+0] + (buf[ofs+1] << 8) + (buf[ofs+2] << 16) + ((DWORD)buf[ofs+3] << 24); + ofs+=4; + + if (xls_debug) { + printf("sz=%u\n", sz); + } + } + } else { + flag = 0; + } + + // Read characters (compressed or not) + ln_toread = 0; + if (ln > 0) { + if (flag & 0x1) { + size_t new_len = 0; + ln_toread = min((size-ofs)/2, ln); + ret=unicode_decode((char *)buf+ofs, ln_toread*2, pWB); + + if (ret == NULL) { + ret = strdup("*failed to decode utf16*"); + } + + ln -= ln_toread; + ofs+=ln_toread*2; + + if (xls_debug) { + new_len = strlen(ret); + printf("String16SST: %s(%lu)\n", ret, (unsigned long)new_len); + } + } else { + ln_toread = min((size-ofs), ln); + + ret = codepage_decode((char *)buf+ofs, ln_toread, pWB); + if (ret == NULL) { + ret = strdup("*failed to decode BIFF5 string*"); + } + + ln -= ln_toread; + ofs += ln_toread; + + if (xls_debug) { + printf("String8SST: %s(%u) \n",ret,ln); + } + } + } else { + ret = strdup(""); + } + + if (ln_toread > 0 || !pWB->sst.continued) { + // Concat string if it's a continue, or add string in table + if (!pWB->sst.continued) { + if (pWB->sst.lastid >= pWB->sst.count) { + free(ret); + return LIBXLS_ERROR_PARSE; + } + pWB->sst.lastid++; + pWB->sst.string[pWB->sst.lastid-1].str=ret; + } else { + char *tmp = pWB->sst.string[pWB->sst.lastid-1].str; + if (tmp == NULL) { + free(ret); + return LIBXLS_ERROR_PARSE; + } + tmp = realloc(tmp, strlen(tmp)+strlen(ret)+1); + if (tmp == NULL) { + free(ret); + return LIBXLS_ERROR_MALLOC; + } + pWB->sst.string[pWB->sst.lastid-1].str=tmp; + memcpy(tmp+strlen(tmp), ret, strlen(ret)+1); + free(ret); + } + + if (xls_debug) { + printf("String %4u: %s\n", pWB->sst.lastid-1, pWB->sst.string[pWB->sst.lastid-1].str); + } + } else { + free(ret); + } + + // Jump list of rich text formatting runs + if (ofs < size && rt > 0) { + int rt_toread = min((size-ofs)/4, rt); + rt -= rt_toread; + ofs += rt_toread*4; + } + + // Jump asian phonetic settings block + if (ofs < size && sz > 0) { + int sz_toread = min((size-ofs), sz); + sz -= sz_toread; + ofs += sz_toread; + } + + pWB->sst.continued=0; + } + + // Save current character count and count of rich text formatting runs and size of asian phonetic settings block + if (ln > 0 || rt > 0 || sz > 0) { + pWB->sst.continued = 1; + pWB->sst.lastln = ln; + pWB->sst.lastrt = rt; + pWB->sst.lastsz = sz; + + if (xls_debug) { + printf("continued: ln=%u, rt=%u, sz=%u\n", ln, rt, sz); + } + } + + return LIBXLS_OK; +} + +static double NumFromRk(DWORD drk) +{ + double ret; + + // What kind of value is this ? + if (drk & 0x02) { + // Integer value + int tmp = (int)drk >> 2; // cast to keep it negative in < 0 + ret = (double)tmp; + } else { + // Floating point value; + unsigned64_t tmp = drk & 0xfffffffc; + tmp <<= 32; + memcpy(&ret, &tmp, sizeof(unsigned64_t)); + } + // Is value multiplied by 100 ? + if (drk & 0x01) { + ret /= 100.0; + } + return ret; +} + +static xls_error_t xls_addSheet(xlsWorkBook* pWB, BOUNDSHEET *bs, DWORD size) +{ + char * name; + DWORD filepos; + BYTE visible, type; + + filepos = bs->filepos; + visible = bs->visible; + type = bs->type; + + // printf("charset=%s uni=%d\n", pWB->charset, unicode); + // printf("bs name %.*s\n", bs->name[0], bs->name+1); + name = get_string(bs->name, size - offsetof(BOUNDSHEET, name), 0, pWB); + // printf("name=%s\n", name); + + if(xls_debug) { + printf ("xls_addSheet[0x%x]\n", type); + switch (type & 0x0f) + { + case 0x00: + /* worksheet or dialog sheet */ + printf ("85: Worksheet or dialog sheet\n"); + break; + case 0x01: + /* Microsoft Excel 4.0 macro sheet */ + printf ("85: Microsoft Excel 4.0 macro sheet\n"); + break; + case 0x02: + /* Chart */ + printf ("85: Chart sheet\n"); + break; + case 0x06: + /* Visual Basic module */ + printf ("85: Visual Basic sheet\n"); + break; + default: + printf ("???\n"); + break; + } + printf("visible: %x\n", visible); + printf(" Pos: %Xh\n",filepos); + printf(" type: %.4Xh\n",type); + printf(" name: %s\n", name); + } + + pWB->sheets.sheet = realloc(pWB->sheets.sheet,(pWB->sheets.count+1)*sizeof (struct st_sheet_data)); + if (pWB->sheets.sheet == NULL) { + free(name); + return LIBXLS_ERROR_MALLOC; + } + + pWB->sheets.sheet[pWB->sheets.count].name=name; + pWB->sheets.sheet[pWB->sheets.count].filepos=filepos; + pWB->sheets.sheet[pWB->sheets.count].visibility=visible; + pWB->sheets.sheet[pWB->sheets.count].type=type; + pWB->sheets.count++; + + return LIBXLS_OK; +} + + +static xls_error_t xls_addRow(xlsWorkSheet* pWS,ROW* row) +{ + struct st_row_data* tmp; + + //verbose ("xls_addRow"); + + if (row->index > pWS->rows.lastrow) + return LIBXLS_ERROR_PARSE; + + tmp=&pWS->rows.row[row->index]; + tmp->height=row->height; + tmp->fcell=row->fcell; + tmp->lcell=row->lcell; + tmp->flags=row->flags; + tmp->xf=row->xf&0xfff; + tmp->xfflags=(row->xf >> 8)&0xf0; + if(xls_debug) xls_showROW(tmp); + + return LIBXLS_OK; +} + +static xls_error_t xls_makeTable(xlsWorkSheet* pWS) +{ + DWORD i,t; + struct st_row_data* tmp; + verbose ("xls_makeTable"); + + if ((pWS->rows.row = calloc((pWS->rows.lastrow+1),sizeof(struct st_row_data))) == NULL) + return LIBXLS_ERROR_MALLOC; + + // printf("ALLOC: rows=%d cols=%d\n", pWS->rows.lastrow, pWS->rows.lastcol); + for (t=0;t<=pWS->rows.lastrow;t++) + { + tmp=&pWS->rows.row[t]; + tmp->index=t; + tmp->fcell=0; + tmp->lcell=pWS->rows.lastcol; + + tmp->cells.count = pWS->rows.lastcol+1; + if ((tmp->cells.cell = calloc(tmp->cells.count, sizeof(struct st_cell_data))) == NULL) + return LIBXLS_ERROR_MALLOC; + + for (i=0;i<=pWS->rows.lastcol;i++) + { + tmp->cells.cell[i].col = i; + tmp->cells.cell[i].row = t; + tmp->cells.cell[i].width = pWS->defcolwidth; + tmp->cells.cell[i].id = XLS_RECORD_BLANK; + } + } + return LIBXLS_OK; +} + +int xls_isCellTooSmall(xlsWorkBook* pWB, BOF* bof, BYTE* buf) { + if (bof->size < sizeof(COL)) + return 1; + + if (bof->id == XLS_RECORD_FORMULA || bof->id == XLS_RECORD_FORMULA_ALT) + return (bof->size < sizeof(FORMULA)); + + if (bof->id == XLS_RECORD_MULRK) + return (bof->size < offsetof(MULRK, rk)); + + if (bof->id == XLS_RECORD_MULBLANK) + return (bof->size < offsetof(MULBLANK, xf)); + + if (bof->id == XLS_RECORD_LABELSST) + return (bof->size < offsetof(LABEL, value) + (pWB->is5ver ? 2 : 4)); + + if (bof->id == XLS_RECORD_LABEL || bof->id == XLS_RECORD_RSTRING) { + if (bof->size < offsetof(LABEL, value) + 2) + return 1; + + size_t label_len = ((LABEL*)buf)->value[0] + (((LABEL*)buf)->value[1] << 8); + if (pWB->is5ver) { + return (bof->size < offsetof(LABEL, value) + 2 + label_len); + } + + if (bof->size < offsetof(LABEL, value) + 3) + return 1; + + if ((((LABEL*)buf)->value[2] & 0x01) == 0) { + return (bof->size < offsetof(LABEL, value) + 3 + label_len); + } + return (bof->size < offsetof(LABEL, value) + 3 + 2 * label_len); + } + + if (bof->id == XLS_RECORD_RK) + return (bof->size < sizeof(RK)); + + if (bof->id == XLS_RECORD_NUMBER) + return (bof->size < sizeof(BR_NUMBER)); + + if (bof->id == XLS_RECORD_BOOLERR) + return (bof->size < sizeof(BOOLERR)); + + return 0; +} + +void xls_cell_set_str(struct st_cell_data *cell, char *str) { + if (cell->str) { + free(cell->str); + } + cell->str = str; +} + +static struct st_cell_data *xls_addCell(xlsWorkSheet* pWS,BOF* bof,BYTE* buf) +{ + struct st_cell_data* cell; + struct st_row_data* row; + WORD col; + int i; + + verbose ("xls_addCell"); + + if (xls_isCellTooSmall(pWS->workbook, bof, buf)) + return NULL; + + // printf("ROW: %u COL: %u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col)); + row=&pWS->rows.row[xlsShortVal(((COL*)buf)->row)]; + + col = xlsShortVal(((COL*)buf)->col); + if (col >= row->cells.count) { + if (xls_debug) fprintf(stderr, "Error: Column index out of bounds\n"); + return NULL; + } + cell = &row->cells.cell[col]; + + cell->id=bof->id; + cell->xf=xlsShortVal(((COL*)buf)->xf); + + switch (bof->id) + { + case XLS_RECORD_FORMULA: + case XLS_RECORD_FORMULA_ALT: + xlsConvertFormula((FORMULA *)buf); + cell->id=XLS_RECORD_FORMULA; + if (((FORMULA*)buf)->res!=0xffff) { + // if a double, then set double and clear l + cell->l=0; + memcpy(&cell->d, &((FORMULA*)buf)->resid, sizeof(double)); // Required for ARM + cell->id = XLS_RECORD_NUMBER; // hack + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + cell->id = bof->id; + } else { + double d = ((FORMULA*)buf)->resdata[1]; + cell->l = 0xFFFF; + switch(((FORMULA*)buf)->resid) { + case 0: // String + break; // cell is half complete, get the STRING next record + case 1: // Boolean + memcpy(&cell->d, &d, sizeof(double)); // Required for ARM + xls_cell_set_str(cell, strdup("bool")); + break; + case 2: // error + memcpy(&cell->d, &d, sizeof(double)); // Required for ARM + xls_cell_set_str(cell, strdup("error")); + break; + case 3: // empty string + xls_cell_set_str(cell, strdup("")); + break; + } + } + if(formula_handler) formula_handler(bof->id, bof->size, buf); + break; + case XLS_RECORD_MULRK: + for (i = 0; i < (bof->size - 6)/6; i++) // 6 == 2 row + 2 col + 2 trailing index + { + WORD index = col + i; + if(index >= row->cells.count) { + if (xls_debug) fprintf(stderr, "Error: MULTI-RK index out of bounds\n"); + return NULL; + } + cell=&row->cells.cell[index]; + cell->id=XLS_RECORD_RK; + cell->xf=xlsShortVal(((MULRK*)buf)->rk[i].xf); + cell->d=NumFromRk(xlsIntVal(((MULRK*)buf)->rk[i].value)); + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + } + break; + case XLS_RECORD_MULBLANK: + for (i = 0; i < (bof->size - 6)/2; i++) // 6 == 2 row + 2 col + 2 trailing index + { + WORD index = col + i; + if(index >= row->cells.count) { + if (xls_debug) fprintf(stderr, "Error: MULTI-BLANK index out of bounds\n"); + return NULL; + } + cell=&row->cells.cell[index]; + cell->id=XLS_RECORD_BLANK; + cell->xf=xlsShortVal(((MULBLANK*)buf)->xf[i]); + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + } + break; + case XLS_RECORD_LABELSST: + case XLS_RECORD_LABEL: + case XLS_RECORD_RSTRING: + xls_cell_set_str(cell, xls_getfcell(pWS->workbook, cell, ((LABEL*)buf)->value)); + if (cell->str) { + sscanf((char *)cell->str, "%d", &cell->l); + sscanf((char *)cell->str, "%lf", &cell->d); + } + break; + case XLS_RECORD_RK: + cell->d=NumFromRk(xlsIntVal(((RK*)buf)->value)); + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + break; + case XLS_RECORD_BLANK: + break; + case XLS_RECORD_NUMBER: + xlsConvertDouble((BYTE *)&((BR_NUMBER*)buf)->value); + memcpy(&cell->d, &((BR_NUMBER*)buf)->value, sizeof(double)); // Required for ARM + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + break; + case XLS_RECORD_BOOLERR: + cell->d = ((BOOLERR *)buf)->value; + if (((BOOLERR *)buf)->iserror) { + xls_cell_set_str(cell, strdup("error")); + } else { + xls_cell_set_str(cell, strdup("bool")); + } + break; + default: + xls_cell_set_str(cell, xls_getfcell(pWS->workbook,cell, NULL)); + break; + } + if (xls_debug) xls_showCell(cell); + + return cell; +} + +static char *xls_addFont(xlsWorkBook* pWB, FONT* font, DWORD size) +{ + struct st_font_data* tmp; + + verbose("xls_addFont"); + + pWB->fonts.font = realloc(pWB->fonts.font,(pWB->fonts.count+1)*sizeof(struct st_font_data)); + if (pWB->fonts.font == NULL) + return NULL; + + tmp=&pWB->fonts.font[pWB->fonts.count]; + + tmp->name = get_string(font->name, size - offsetof(FONT, name), 0, pWB); + + tmp->height=font->height; + tmp->flag=font->flag; + tmp->color=font->color; + tmp->bold=font->bold; + tmp->escapement=font->escapement; + tmp->underline=font->underline; + tmp->family=font->family; + tmp->charset=font->charset; + + // xls_showFont(tmp); + pWB->fonts.count++; + + return tmp->name; +} + +static xls_error_t xls_addFormat(xlsWorkBook* pWB, FORMAT* format, DWORD size) +{ + struct st_format_data* tmp; + + verbose("xls_addFormat"); + pWB->formats.format = realloc(pWB->formats.format, (pWB->formats.count+1)*sizeof(struct st_format_data)); + if (pWB->formats.format == NULL) + return LIBXLS_ERROR_MALLOC; + + tmp = &pWB->formats.format[pWB->formats.count]; + tmp->index = format->index; + tmp->value = get_string(format->value, size - offsetof(FORMAT, value), (BYTE)!pWB->is5ver, pWB); + if(xls_debug) xls_showFormat(tmp); + pWB->formats.count++; + + return LIBXLS_OK; +} + +static xls_error_t xls_addXF8(xlsWorkBook* pWB,XF8* xf) +{ + struct st_xf_data* tmp; + + verbose("xls_addXF"); + pWB->xfs.xf= realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data)); + if (pWB->xfs.xf == NULL) + return LIBXLS_ERROR_MALLOC; + + tmp=&pWB->xfs.xf[pWB->xfs.count]; + + tmp->font=xf->font; + tmp->format=xf->format; + tmp->type=xf->type; + tmp->align=xf->align; + tmp->rotation=xf->rotation; + tmp->ident=xf->ident; + tmp->usedattr=xf->usedattr; + tmp->linestyle=xf->linestyle; + tmp->linecolor=xf->linecolor; + tmp->groundcolor=xf->groundcolor; + + // xls_showXF(tmp); + pWB->xfs.count++; + + return LIBXLS_OK; +} + +static xls_error_t xls_addXF5(xlsWorkBook* pWB,XF5* xf) +{ + struct st_xf_data* tmp; + + verbose("xls_addXF"); + pWB->xfs.xf = realloc(pWB->xfs.xf, (pWB->xfs.count+1)*sizeof(struct st_xf_data)); + if (pWB->xfs.xf == NULL) + return LIBXLS_ERROR_MALLOC; + + tmp=&pWB->xfs.xf[pWB->xfs.count]; + + tmp->font=xf->font; + tmp->format=xf->format; + tmp->type=xf->type; + tmp->align=(BYTE)xf->align; +/* + tmp->rotation=xf->rotation; + tmp->ident=xf->ident; + tmp->usedattr=xf->usedattr; + tmp->linestyle=xf->linestyle; + tmp->linecolor=xf->linecolor; + tmp->groundcolor=xf->groundcolor; +*/ + + // xls_showXF(tmp); + pWB->xfs.count++; + return LIBXLS_OK; +} + +static xls_error_t xls_addColinfo(xlsWorkSheet* pWS,COLINFO* colinfo) +{ + struct st_colinfo_data* tmp; + + verbose("xls_addColinfo"); + pWS->colinfo.col = realloc(pWS->colinfo.col,(pWS->colinfo.count+1)*sizeof(struct st_colinfo_data)); + if (pWS->colinfo.col == NULL) + return LIBXLS_ERROR_MALLOC; + + tmp=&pWS->colinfo.col[pWS->colinfo.count]; + tmp->first=colinfo->first; + tmp->last=colinfo->last; + tmp->width=colinfo->width; + tmp->xf=colinfo->xf; + tmp->flags=colinfo->flags; + + if(xls_debug) xls_showColinfo(tmp); + pWS->colinfo.count++; + + return LIBXLS_OK; +} + +static xls_error_t xls_mergedCells(xlsWorkSheet* pWS,BOF* bof,BYTE* buf) +{ + if (bof->size < sizeof(WORD)) + return LIBXLS_ERROR_PARSE; + + int count = buf[0] + (buf[1] << 8); + DWORD limit = sizeof(WORD)+count*sizeof(struct MERGEDCELLS); + if(limit > (DWORD)bof->size) { + verbose("Merged Cells Count out of range"); + return LIBXLS_ERROR_PARSE; + } + int i,c,r; + struct MERGEDCELLS *span; + verbose("Merged Cells"); + for (i=0;icolf,span->rowf,span->coll,span->rowl); + // Sanity check: + if(!( span->rowf <= span->rowl && + span->rowl <= pWS->rows.lastrow && + span->colf <= span->coll && + span->coll <= pWS->rows.lastcol + )) { + return LIBXLS_ERROR_PARSE; + } + + for (r=span->rowf;r<=span->rowl;r++) + for (c=span->colf;c<=span->coll;c++) + pWS->rows.row[r].cells.cell[c].isHidden=1; + pWS->rows.row[span->rowf].cells.cell[span->colf].colspan=(span->coll-span->colf+1); + pWS->rows.row[span->rowf].cells.cell[span->colf].rowspan=(span->rowl-span->rowf+1); + pWS->rows.row[span->rowf].cells.cell[span->colf].isHidden=0; + } + return LIBXLS_OK; +} + +int xls_isRecordTooSmall(xlsWorkBook *pWB, BOF *bof1, const BYTE* buf) { + switch (bof1->id) { + case XLS_RECORD_BOF: // BIFF5-8 + return (bof1->size < 2 * sizeof(WORD)); + case XLS_RECORD_CODEPAGE: + return (bof1->size < sizeof(WORD)); + case XLS_RECORD_WINDOW1: + return (bof1->size < sizeof(WIND1)); + case XLS_RECORD_SST: + return (bof1->size < offsetof(SST, strings)); + case XLS_RECORD_BOUNDSHEET: + return (bof1->size < offsetof(BOUNDSHEET, name)); + case XLS_RECORD_XF: + if(pWB->is5ver) { + return (bof1->size < sizeof(XF5)); + } + return (bof1->size < sizeof(XF8)); + case XLS_RECORD_FONT: + case XLS_RECORD_FONT_ALT: + return (bof1->size < offsetof(FONT, name)); + case XLS_RECORD_FORMAT: + return (bof1->size < offsetof(FORMAT, value)); + case XLS_RECORD_STYLE: + { + struct { + unsigned short idx; + unsigned char ident; + unsigned char lvl; + } *styl; + if(bof1->size < 2) { + return 1; + } + styl = (void *)buf; + if(xlsShortVal(styl->idx) & 0x8000) { + return bof1->size < 4; + } else { + if(bof1->size < 3) return 1; + return bof1->size < 3 + styl->ident; + } + } + case XLS_RECORD_1904: + return (bof1->size < sizeof(BYTE)); + default: + break; + } + return 0; +} + +xls_error_t xls_parseWorkBook(xlsWorkBook* pWB) +{ + if(!pWB) return LIBXLS_ERROR_NULL_ARGUMENT; + + BOF bof1 = { .id = 0, .size = 0 }; + BOF bof2 = { .id = 0, .size = 0 }; + BYTE* buf = NULL; + BYTE once = 0; + xls_error_t retval = LIBXLS_OK; + + verbose ("xls_parseWorkBook"); + do { + if(xls_debug > 10) { + printf("READ WORKBOOK filePos=%ld\n", (long)pWB->filepos); + printf(" OLE: start=%d pos=%u size=%u fatPos=%u\n", + pWB->olestr->start, (unsigned int)pWB->olestr->pos, + (unsigned int)pWB->olestr->size, (unsigned int)pWB->olestr->fatpos); + } + + if (ole2_read(&bof1, 1, 4, pWB->olestr) != 4) { + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + xlsConvertBof(&bof1); + if(xls_debug) xls_showBOF(&bof1); + + if (bof1.size) { + if ((buf = realloc(buf, bof1.size)) == NULL) { + if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)bof1.size); + retval = LIBXLS_ERROR_MALLOC; + goto cleanup; + } + if (ole2_read(buf, 1, bof1.size, pWB->olestr) != bof1.size) { + if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + } + + if (xls_isRecordTooSmall(pWB, &bof1, buf)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + + switch (bof1.id) { + case XLS_RECORD_EOF: + //verbose("EOF"); + break; + case XLS_RECORD_BOF: // BIFF5-8 + pWB->is5ver = (buf[0] + (buf[1] << 8) != 0x600); + pWB->type = buf[2] + (buf[3] << 8); + if(xls_debug) { + printf("version: %s\n", pWB->is5ver ? "BIFF5" : "BIFF8" ); + printf(" type: %.2X\n", pWB->type); + } + break; + + case XLS_RECORD_CODEPAGE: + pWB->codepage = buf[0] + (buf[1] << 8); + if(xls_debug) printf("codepage: %d\n", pWB->codepage); + break; + + case XLS_RECORD_CONTINUE: + if(once) { + if (bof2.id==XLS_RECORD_SST) { + if ((retval = xls_appendSST(pWB,buf,bof1.size)) != LIBXLS_OK) + goto cleanup; + } + bof1=bof2; + } + break; + + case XLS_RECORD_WINDOW1: + { + WIND1 *w = (WIND1*)buf; + xlsConvertWindow(w); + pWB->activeSheetIdx = w->itabCur; + if(xls_debug) { + printf("WINDOW1: "); + printf("xWn : %d\n", w->xWn/20); + printf("yWn : %d\n", w->yWn/20); + printf("dxWn : %d\n", w->dxWn/20); + printf("dyWn : %d\n", w->dyWn/20); + printf("grbit : %d\n", w->grbit); + printf("itabCur: %d\n", w->itabCur); + printf("itabFi : %d\n", w->itabFirst); + printf("ctabSel: %d\n", w->ctabSel); + printf("wTabRat: %d\n", w->wTabRatio); + } + } + break; + + case XLS_RECORD_SST: + //printf("ADD SST\n"); + xlsConvertSst((SST *)buf); + if ((retval = xls_addSST(pWB,(SST*)buf,bof1.size)) != LIBXLS_OK) { + goto cleanup; + } + break; + + case XLS_RECORD_EXTSST: + break; + + case XLS_RECORD_BOUNDSHEET: + { + //printf("ADD SHEET\n"); + BOUNDSHEET *bs = (BOUNDSHEET *)buf; + xlsConvertBoundsheet(bs); + // different for BIFF5 and BIFF8 + if ((retval = xls_addSheet(pWB, bs, bof1.size)) != LIBXLS_OK) { + goto cleanup; + } + } + break; + + case XLS_RECORD_XF: + if(pWB->is5ver) { + XF5 *xf; + xf = (XF5 *)buf; + xlsConvertXf5(xf); + + if ((retval = xls_addXF5(pWB,xf)) != LIBXLS_OK) { + goto cleanup; + } + if(xls_debug) { + printf(" font: %d\n", xf->font); + printf(" format: %d\n", xf->format); + printf(" type: %.4x\n", xf->type); + printf(" align: %.4x\n", xf->align); + printf("rotatio: %.4x\n", xf->color); + printf(" ident: %.4x\n", xf->fill); + printf("usedatt: %.4x\n", xf->border); + printf("linesty: %.4x\n", xf->linestyle); + } + } else { + XF8 *xf; + xf = (XF8 *)buf; + xlsConvertXf8(xf); + + if ((retval = xls_addXF8(pWB,xf)) != LIBXLS_OK) { + goto cleanup; + } + + if(xls_debug) { + xls_showXF(xf); + } + } + break; + + case XLS_RECORD_FONT: + case XLS_RECORD_FONT_ALT: + { + char *s; + FONT *f = (FONT*)buf; + xlsConvertFont(f); + s = xls_addFont(pWB,f, bof1.size); + if(xls_debug) { + printf(" height: %d\n", f->height); + printf(" flag: 0x%x\n", f->flag); + printf(" color: 0x%x\n", f->color); + printf(" weight: %d\n", f->bold); + printf("escapem: 0x%x\n", f->escapement); + printf("underln: 0x%x\n", f->underline); + printf(" family: 0x%x\n", f->family); + printf("charset: 0x%x\n", f->charset); + if(s) printf(" name: %s\n", s); + } + } + break; + + case XLS_RECORD_FORMAT: + xlsConvertFormat((FORMAT *)buf); + if ((retval = xls_addFormat(pWB, (FORMAT*)buf, bof1.size)) != LIBXLS_OK) { + goto cleanup; + } + break; + + case XLS_RECORD_STYLE: + if(xls_debug) { + struct { unsigned short idx; unsigned char ident; unsigned char lvl; } *styl; + styl = (void *)buf; + + printf(" idx: 0x%x\n", styl->idx & 0x07FF); + if(styl->idx & 0x8000) { + printf(" ident: 0x%x\n", styl->ident); + printf(" level: 0x%x\n", styl->lvl); + } else { + char *s = get_string((char *)&buf[2], bof1.size - 2, 1, pWB); + printf(" name=%s\n", s); + free(s); + } + } + break; + + case XLS_RECORD_PALETTE: + if(xls_debug > 10) { + unsigned char *p = buf + 2; + int idx, len; + + len = buf[0] + (buf[1] << 8); + for(idx=0; idxis1904 = *(BYTE *)buf; // the field is a short, but with little endian the first byte is 0 or 1 + if(xls_debug) { + printf(" mode: 0x%x\n", pWB->is1904); + } + break; + + case XLS_RECORD_FILEPASS: + retval = LIBXLS_ERROR_UNSUPPORTED_ENCRYPTION; + goto cleanup; + + case XLS_RECORD_DEFINEDNAME: + if(xls_debug) { + int i; + printf(" DEFINEDNAME: "); + for(i=0; iolestr->eof)&&(bof1.id!=XLS_RECORD_EOF)); + +cleanup: + if (buf) + free(buf); + + return retval; +} + + +static xls_error_t xls_preparseWorkSheet(xlsWorkSheet* pWS) +{ + if(!pWS) return LIBXLS_ERROR_NULL_ARGUMENT; + + BOF tmp; + BYTE* buf = NULL; + xls_error_t retval = LIBXLS_OK; + + verbose ("xls_preparseWorkSheet"); + + if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) { + retval = LIBXLS_ERROR_SEEK; + goto cleanup; + } + do + { + size_t read; + if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) { + if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + xlsConvertBof(&tmp); + if (tmp.size) { + if ((buf = realloc(buf, tmp.size)) == NULL) { + if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size); + retval = LIBXLS_ERROR_MALLOC; + goto cleanup; + } + if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) { + if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + } + + switch (tmp.id) + { + case XLS_RECORD_DEFCOLWIDTH: + if (tmp.size < sizeof(WORD)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + pWS->defcolwidth = (buf[0] << 8) + (buf[1] << 16); + break; + case XLS_RECORD_COLINFO: + if (tmp.size < sizeof(COLINFO)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + xlsConvertColinfo((COLINFO*)buf); + if ((retval = xls_addColinfo(pWS,(COLINFO*)buf)) != LIBXLS_OK) + goto cleanup; + break; + case XLS_RECORD_ROW: + if (tmp.size < sizeof(ROW)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + xlsConvertRow((ROW*)buf); + /* The lcell field is 1-indexed whereas lastcol is 0-indexed */ + if (pWS->rows.lastcol+1<((ROW*)buf)->lcell) + pWS->rows.lastcol=((ROW*)buf)->lcell-1; + if (pWS->rows.lastrow<((ROW*)buf)->index) + pWS->rows.lastrow=((ROW*)buf)->index; + break; + /* If the ROW record is incorrect or missing, infer the information from + * cell data. */ + case XLS_RECORD_MULRK: + if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if (pWS->rows.lastcolcol) + (tmp.size - 6)/6 - 1) + pWS->rows.lastcol=xlsShortVal(((MULRK*)buf)->col) + (tmp.size - 6)/6 - 1; + if (pWS->rows.lastrowrow)) + pWS->rows.lastrow=xlsShortVal(((MULRK*)buf)->row); + break; + case XLS_RECORD_MULBLANK: + if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if (pWS->rows.lastcolcol) + (tmp.size - 6)/2 - 1) + pWS->rows.lastcol=xlsShortVal(((MULBLANK*)buf)->col) + (tmp.size - 6)/2 - 1; + if (pWS->rows.lastrowrow)) + pWS->rows.lastrow=xlsShortVal(((MULBLANK*)buf)->row); + break; + case XLS_RECORD_NUMBER: + case XLS_RECORD_RK: + case XLS_RECORD_LABELSST: + case XLS_RECORD_BLANK: + case XLS_RECORD_LABEL: + case XLS_RECORD_RSTRING: + case XLS_RECORD_FORMULA: + case XLS_RECORD_FORMULA_ALT: + case XLS_RECORD_BOOLERR: + if (xls_isCellTooSmall(pWS->workbook, &tmp, buf)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if (pWS->rows.lastcolcol)) + pWS->rows.lastcol=xlsShortVal(((COL*)buf)->col); + if (pWS->rows.lastrowrow)) + pWS->rows.lastrow=xlsShortVal(((COL*)buf)->row); + break; + } + if (pWS->rows.lastcol > 255) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + } + while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF)); + +cleanup: + if (buf) + free(buf); + return retval; +} + +static xls_error_t xls_formatColumn(xlsWorkSheet* pWS) +{ + DWORD i,t,ii; + DWORD fcol,lcol; + WORD width; + BYTE isHidden; + + for (i=0;icolinfo.count;i++) + { + width = pWS->colinfo.col[i].width; + isHidden = (pWS->colinfo.col[i].flags&1); + if (pWS->colinfo.col[i].first<=pWS->rows.lastcol) + fcol=pWS->colinfo.col[i].first; + else + fcol=pWS->rows.lastcol; + + if (pWS->colinfo.col[i].last<=pWS->rows.lastcol) + lcol=pWS->colinfo.col[i].last; + else + lcol=pWS->rows.lastcol; + + for (ii=0;ii<=pWS->rows.lastrow;ii++) { + for (t=fcol;t<=lcol;t++) { + pWS->rows.row[ii].cells.cell[t].isHidden |= isHidden; + pWS->rows.row[ii].cells.cell[t].width = width; + } + } + } + return LIBXLS_OK; +} + +xls_error_t xls_parseWorkSheet(xlsWorkSheet* pWS) +{ + if(!pWS) return LIBXLS_ERROR_NULL_ARGUMENT; + + BOF tmp; + BYTE* buf = NULL; + long offset = pWS->filepos; + size_t read; + xls_error_t retval = 0; + + struct st_cell_data *cell = NULL; + xlsWorkBook *pWB = pWS->workbook; + + verbose ("xls_parseWorkSheet"); + + if ((retval = xls_preparseWorkSheet(pWS)) != LIBXLS_OK) { + goto cleanup; + } + // printf("size=%d fatpos=%d)\n", pWS->workbook->olestr->size, pWS->workbook->olestr->fatpos); + + if ((retval = xls_makeTable(pWS)) != LIBXLS_OK) { + goto cleanup; + } + + if ((retval = xls_formatColumn(pWS)) != LIBXLS_OK) { + goto cleanup; + } + + if (ole2_seek(pWS->workbook->olestr,pWS->filepos) == -1) { + retval = LIBXLS_ERROR_SEEK; + goto cleanup; + } + do + { + long lastPos = offset; + + if(xls_debug > 10) { + printf("LASTPOS=%ld pos=%d filePos=%d filePos=%d\n", lastPos, (int)pWB->olestr->pos, pWS->filepos, pWB->filepos); + } + if((read = ole2_read(&tmp, 1, 4, pWS->workbook->olestr)) != 4) { + if (xls_debug) fprintf(stderr, "Error: failed to read OLE size\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + xlsConvertBof((BOF *)&tmp); + if (tmp.size) { + if ((buf = realloc(buf, tmp.size)) == NULL) { + if (xls_debug) fprintf(stderr, "Error: failed to allocate buffer of size %d\n", (int)tmp.size); + retval = LIBXLS_ERROR_MALLOC; + goto cleanup; + } + if((read = ole2_read(buf, 1, tmp.size, pWS->workbook->olestr)) != tmp.size) { + if (xls_debug) fprintf(stderr, "Error: failed to read OLE block\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + } + offset += 4 + tmp.size; + + if(xls_debug) + xls_showBOF(&tmp); + + switch (tmp.id) + { + case XLS_RECORD_EOF: + break; + case XLS_RECORD_MERGEDCELLS: + if ((retval = xls_mergedCells(pWS,&tmp,buf)) != LIBXLS_OK) { + goto cleanup; + } + break; + case XLS_RECORD_ROW: + if (tmp.size < sizeof(ROW)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if(xls_debug > 10) printf("ROW: %x at pos=%ld\n", tmp.id, lastPos); + xlsConvertRow((ROW *)buf); + if ((retval = xls_addRow(pWS,(ROW*)buf)) != LIBXLS_OK) { + goto cleanup; + } + break; + case XLS_RECORD_DEFCOLWIDTH: + if (tmp.size < sizeof(WORD)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if(xls_debug > 10) printf("DEFAULT COL WIDTH: %d\n", ((WORD *)buf)[0]); + break; + case XLS_RECORD_DEFAULTROWHEIGHT: + if (tmp.size < 2 * sizeof(WORD)) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + if(xls_debug > 10) printf("DEFAULT ROW Height: 0x%x %d\n", ((WORD *)buf)[0], ((WORD *)buf)[1]); + break; + case XLS_RECORD_DBCELL: + if(xls_debug > 10) { + DWORD *foo = (DWORD *)buf; + WORD *goo; + int i; + printf("DBCELL: size %d\n", tmp.size); + printf("DBCELL OFFSET=%4.4u -> ROW %ld\n", foo[0], lastPos-foo[0]); + ++foo; + goo = (WORD *)foo; + for(i=0; i<5; ++i) printf("goo[%d]=%4.4x %u\n", i, goo[i], goo[i]); + } + break; + case XLS_RECORD_INDEX: + if(xls_debug > 10) { + DWORD *foo = (DWORD *)buf; + int i; + printf("INDEX: size %d\n", tmp.size); + for(i=0; i<5; ++i) printf("FOO[%d]=%4.4x %u\n", i, foo[i], foo[i]); + } +#if 0 + 0 4 4 4 8 4 + 12 4 16 4∙nm + Not used Index to first used row (rf, 0-based) Index to first row of unused tail of sheet (rl, last used row + 1, 0-based) + Absolute stream position of the DEFCOLWIDTH record (➜5.32) of the current sheet. If this record does not exist, the offset points to the record at the position where the DEFCOLWIDTH record would occur. + Array of nm absolute stream positions to the DBCELL record (➜5.29) of each Row Block +#endif + break; + case XLS_RECORD_MULRK: + case XLS_RECORD_MULBLANK: + case XLS_RECORD_NUMBER: + case XLS_RECORD_BOOLERR: + case XLS_RECORD_RK: + case XLS_RECORD_LABELSST: + case XLS_RECORD_BLANK: + case XLS_RECORD_LABEL: + case XLS_RECORD_RSTRING: + case XLS_RECORD_FORMULA: + case XLS_RECORD_FORMULA_ALT: + if ((cell = xls_addCell(pWS, &tmp, buf)) == NULL) { + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + break; + case XLS_RECORD_ARRAY: + if(formula_handler) formula_handler(tmp.id, tmp.size, buf); + break; + + case XLS_RECORD_STRING: + if(cell && (cell->id == XLS_RECORD_FORMULA || cell->id == XLS_RECORD_FORMULA_ALT)) { + xls_cell_set_str(cell, get_string((char *)buf, tmp.size, + (BYTE)!pWB->is5ver, pWB)); + if (xls_debug) xls_showCell(cell); + } + break; + + default: + if(xls_debug) + { + //xls_showBOF(&tmp); + if (tmp.size >= sizeof(COL)) { + printf(" [%d:%d]: 0x%X at pos=%lu size=%u\n", xlsShortVal(((COL*)buf)->row), xlsShortVal(((COL*)buf)->col), + tmp.id, lastPos, tmp.size); + } else { + printf(" 0x%X at pos=%lu size=%u\n", tmp.id, lastPos, tmp.size); + } + } + break; + } + } + while ((!pWS->workbook->olestr->eof)&&(tmp.id!=XLS_RECORD_EOF)); + +cleanup: + if (buf) + free(buf); + + return retval; +} + +xlsWorkSheet * xls_getWorkSheet(xlsWorkBook* pWB,int num) +{ + xlsWorkSheet * pWS = NULL; + verbose ("xls_getWorkSheet"); + if (num >= 0 && num < (int)pWB->sheets.count) { + pWS = calloc(1, sizeof(xlsWorkSheet)); + pWS->filepos=pWB->sheets.sheet[num].filepos; + pWS->workbook=pWB; + pWS->rows.lastcol=0; + pWS->rows.lastrow=0; + pWS->colinfo.count=0; + } + return pWS; +} + +static xlsWorkBook *xls_open_ole(OLE2 *ole, const char *charset, xls_error_t *outError) { + xlsWorkBook* pWB; + xls_error_t retval = LIBXLS_OK; + + pWB = calloc(1, sizeof(xlsWorkBook)); + verbose ("xls_open_ole"); + + if ((pWB->olestr=ole2_fopen(ole, "\005SummaryInformation"))) + { + pWB->summary = calloc(1,4096); + if (ole2_read(pWB->summary, 4096, 1, pWB->olestr) == -1) { + if (xls_debug) fprintf(stderr, "SummaryInformation not found\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + ole2_fclose(pWB->olestr); + } + + if ((pWB->olestr=ole2_fopen(ole, "\005DocumentSummaryInformation"))) + { + pWB->docSummary = calloc(1, 4096); + if (ole2_read(pWB->docSummary, 4096, 1, pWB->olestr) == -1) { + if (xls_debug) fprintf(stderr, "DocumentSummaryInformation not found\n"); + retval = LIBXLS_ERROR_READ; + goto cleanup; + } + ole2_fclose(pWB->olestr); + } + +#if 0 + if(xls_debug) { + printf("summary=%d docsummary=%d\n", pWB->summary ? 1 : 0, pWB->docSummary ? 1 : 0); + xlsSummaryInfo *si = xls_summaryInfo(pWB); + printf("title=%s\n", si->title); + printf("subject=%s\n", si->subject); + printf("author=%s\n", si->author); + printf("keywords=%s\n", si->keywords); + printf("comment=%s\n", si->comment); + printf("lastAuthor=%s\n", si->lastAuthor); + printf("appName=%s\n", si->appName); + printf("category=%s\n", si->category); + printf("manager=%s\n", si->manager); + printf("company=%s\n", si->company); + } +#endif + + // open Workbook + if (!(pWB->olestr=ole2_fopen(ole,"Workbook")) && !(pWB->olestr=ole2_fopen(ole,"Book"))) + { + if(xls_debug) fprintf(stderr, "Workbook not found\n"); + retval = LIBXLS_ERROR_PARSE; + goto cleanup; + } + + pWB->sheets.count=0; + pWB->xfs.count=0; + pWB->fonts.count=0; + pWB->charset = strdup(charset ? charset : "UTF-8"); + + retval = xls_parseWorkBook(pWB); + +cleanup: + if (retval != LIBXLS_OK) { + if (!pWB->olestr) + ole2_close(ole); + xls_close_WB(pWB); + pWB = NULL; + } + if (outError) + *outError = retval; + + return pWB; +} + +xlsWorkBook* xls_open(const char *file, const char* charset) +{ + return xls_open_file(file, charset, NULL); +} + +xlsWorkBook* xls_open_file(const char *file, const char* charset, xls_error_t *outError) { + OLE2* ole = NULL; + + if (!(ole=ole2_open_file(file))) + { + if (xls_debug) fprintf(stderr, "File \"%s\" not found\n",file); + if (outError) *outError = LIBXLS_ERROR_OPEN; + return NULL; + } + + return xls_open_ole(ole, charset, outError); +} + +xlsWorkBook *xls_open_buffer(const unsigned char *buffer, size_t len, + const char *charset, xls_error_t *outError) { + OLE2* ole = NULL; + + if (!(ole=ole2_open_buffer(buffer, len))) + { + if (outError) *outError = LIBXLS_ERROR_OPEN; + return NULL; + } + + return xls_open_ole(ole, charset, outError); +} + +xlsRow *xls_row(xlsWorkSheet* pWS, WORD cellRow) +{ + if(cellRow > pWS->rows.lastrow) + return NULL; + + if (pWS->rows.row == NULL) + return NULL; + + return &pWS->rows.row[cellRow]; +} + +xlsCell *xls_cell(xlsWorkSheet* pWS, WORD cellRow, WORD cellCol) +{ + struct st_row_data *row; + + if ((row = xls_row(pWS, cellRow)) == NULL) + return NULL; + + if(cellCol >= row->cells.count) + return NULL; + + return &row->cells.cell[cellCol]; +} + +void xls_close_WB(xlsWorkBook* pWB) +{ + OLE2* ole; + + verbose ("xls_close"); + + if(!pWB) return; + + // OLE first + if (pWB->olestr) { + ole=pWB->olestr->ole; + ole2_fclose(pWB->olestr); + ole2_close(ole); + } + + // WorkBook + free(pWB->charset); + + // Sheets + { + DWORD i; + for(i=0; isheets.count; ++i) { + free(pWB->sheets.sheet[i].name); + } + free(pWB->sheets.sheet); + } + + // SST + { + DWORD i; + for(i=0; isst.count; ++i) { + free(pWB->sst.string[i].str); + } + free(pWB->sst.string); + } + + // xfs + { + free(pWB->xfs.xf); + } + + // fonts + { + DWORD i; + for(i=0; ifonts.count; ++i) { + free(pWB->fonts.font[i].name); + } + free(pWB->fonts.font); + } + + // formats + { + DWORD i; + for(i=0; iformats.count; ++i) { + free(pWB->formats.format[i].value); + } + free(pWB->formats.format); + } + + // buffers + if(pWB->summary) free(pWB->summary); + if(pWB->docSummary) free(pWB->docSummary); + +#ifdef HAVE_ICONV + if (pWB->converter) + iconv_close((iconv_t)pWB->converter); + if (pWB->utf16_converter) + iconv_close((iconv_t)pWB->utf16_converter); +#endif + + if (pWB->utf8_locale) + xls_freelocale((xls_locale_t)pWB->utf8_locale); + + // TODO - free other dynamically allocated objects like string table?? + free(pWB); +} + +void xls_close_WS(xlsWorkSheet* pWS) +{ + if(!pWS) return; + + if (pWS->rows.row) { + DWORD i, j; + for(j=0; j<=pWS->rows.lastrow; ++j) { + struct st_row_data *row = &pWS->rows.row[j]; + for(i=0; icells.count; ++i) { + free(row->cells.cell[i].str); + } + free(row->cells.cell); + } + free(pWS->rows.row); + } + + // COLINFO + { + free(pWS->colinfo.col); + } + free(pWS); +} + +const char* xls_getVersion(void) +{ + return PACKAGE_VERSION; +} + +const char* xls_getError(xls_error_t code) { + if (code == LIBXLS_OK) + return "No error"; + if (code == LIBXLS_ERROR_READ) + return "Unable to read from file"; + if (code == LIBXLS_ERROR_OPEN) + return "Unable to open file"; + if (code == LIBXLS_ERROR_SEEK) + return "Unable to seek within file"; + if (code == LIBXLS_ERROR_MALLOC) + return "Unable to allocate memory"; + if (code == LIBXLS_ERROR_PARSE) + return "Unable to parse file"; + if (code == LIBXLS_ERROR_UNSUPPORTED_ENCRYPTION) + return "Unsupported encryption scheme"; + + return "Unknown error"; +} + +// +// http://poi.apache.org/hpsf/internals.html +// or google "DocumentSummaryInformation and UserDefined Property Sets" and look for MSDN hits +// + +xlsSummaryInfo *xls_summaryInfo(xlsWorkBook* pWB) +{ + xlsSummaryInfo *pSI; + + pSI = (xlsSummaryInfo *)calloc(1, sizeof(xlsSummaryInfo)); + xls_dumpSummary(pWB->summary, 1, pSI); + xls_dumpSummary(pWB->docSummary, 0, pSI); + + return pSI; +} + +void xls_close_summaryInfo(xlsSummaryInfo *pSI) +{ + if(!pSI) return; + + if(pSI->title) free(pSI->title); + if(pSI->subject) free(pSI->subject); + if(pSI->author) free(pSI->author); + if(pSI->keywords) free(pSI->keywords); + if(pSI->comment) free(pSI->comment); + if(pSI->lastAuthor) free(pSI->lastAuthor); + if(pSI->appName) free(pSI->appName); + if(pSI->category) free(pSI->category); + if(pSI->manager) free(pSI->manager); + if(pSI->company) free(pSI->company); + + free(pSI); +} + +static void xls_dumpSummary(char *buf,int isSummary,xlsSummaryInfo *pSI) { + header *head; + sectionList *secList; + propertyList *plist; + sectionHeader *secHead; + property *prop; + uint32_t i, j; + + if(!buf) return; // perhaps the document was missing?? + + head = (header *)buf; + //printf("header: \n"); + //printf(" sig=%x\n", head->sig); + //printf(" os=%x\n", head->os >> 16); + //printf(" class=%8.8x%8.8x%8.8x%8.8x\n", head->format[0], head->format[1], head->format[2], head->format[3]); + //printf(" count=%x\n", head->count); + + for(i=0; icount; ++i) { + secList = &head->secList[i]; + //printf("Section %d:\n", i); + //printf(" class=%8.8x%8.8x%8.8x%8.8x\n", secList->format[0], secList->format[1], secList->format[2], secList->format[3]); + //printf(" offset=%d (now at %ld\n", secList->offset, (char *)secList - (char *)buf + sizeof(sectionList)); + + + secHead = (sectionHeader *)((char *)head + secList->offset); + //printf(" len=%d\n", secHead->length); + //printf(" properties=%d\n", secHead->numProperties); + for(j=0; jnumProperties; ++j) { + BYTE **s; + + plist = &secHead->properties[j]; + //printf(" ---------\n"); + //printf(" propID=%d offset=%d\n", plist->propertyID, plist->sectionOffset); + prop = (property *)((char *)secHead + plist->sectionOffset); + //printf(" propType=%d\n", prop->propertyID); + + switch(prop->propertyID) { + case 2: + //printf(" xlsShortVal=%x\n", *(uint16_t *)prop->data); + break; + case 3: + //printf(" wordVal=%x\n", *(uint32_t *)prop->data); + break; + case 30: + //printf(" longVal=%llx\n", *(uint64_t *)prop->data); + //printf(" s[%u]=%s\n", *(uint32_t *)prop->data, (char *)prop->data + 4); + if(isSummary) { + switch(plist->propertyID) { + case 2: s = &pSI->title; break; + case 3: s = &pSI->subject; break; + case 4: s = &pSI->author; break; + case 5: s = &pSI->keywords; break; + case 6: s = &pSI->comment; break; + case 8: s = &pSI->lastAuthor; break; + case 18: s = &pSI->appName; break; + default: s = NULL; break; + } + } else { + switch(plist->propertyID) { + case 2: s = &pSI->category; break; + case 14: s = &pSI->manager; break; + case 15: s = &pSI->company; break; + default: s = NULL; break; + } + } + if(s) *s = (BYTE *)strdup((char *)prop->data + 4); + break; + case 64: + //printf(" longVal=%llx\n", *(uint64_t *)prop->data); + break; + case 65: +#if 0 + { + uint32_t k; + for(k=0; k<*(uint32_t *)prop->data; ++k) { + unsigned char *t = (unsigned char *)prop->data + 4 + k; + printf(" %2.2x(%c)", *t, *t); + } + printf("\n"); + } +#endif + break; + default: + //printf(" UNKNOWN!\n"); + break; + } + } + } +} + +void xls_set_formula_hander(xls_formula_handler handler) +{ + formula_handler = handler; +} diff --git a/3rdparty/libs/fileext/excel/libxls/xlstool.c b/3rdparty/libs/fileext/excel/libxls/xlstool.c new file mode 100644 index 0000000..2277136 --- /dev/null +++ b/3rdparty/libs/fileext/excel/libxls/xlstool.c @@ -0,0 +1,861 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * Copyright 2004 Komarov Valery + * Copyright 2006 Christophe Leitienne + * Copyright 2008-2017 David Hoerl + * Copyright 2013 Bob Colbert + * Copyright 2013-2018 Evan Miller + * + * This file is part of libxls -- A multiplatform, C/C++ library for parsing + * Excel(TM) files. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "config.h" + +#include +#include +#include + +#ifdef HAVE_ICONV +#include +#endif + +#include +#include +#include +#include +#include + +//#include "xls.h" +#include "../include/libxls/xlstypes.h" +#include "../include/libxls/xlsstruct.h" +#include "../include/libxls/xlstool.h" +#include "../include/libxls/brdb.h" +#include "../include/libxls/endian.h" +#include "../include/libxls/locale.h" + +extern int xls_debug; + +/* Not a complete list */ +enum xls_format_e { + XLS_FORMAT_GENERAL, // "" + XLS_FORMAT_NUMBER1, // "0" + XLS_FORMAT_NUMBER2, // "0.00", + XLS_FORMAT_NUMBER3, // "#,##0", + XLS_FORMAT_NUMBER4, // "#,##0.00", + XLS_FORMAT_CURRENCY1, // "\"$\"#,##0_);(\"$\"#,##0)", + XLS_FORMAT_CURRENCY2, // "\"$\"#,##0_);[Red](\"$\"#,##0)", + XLS_FORMAT_CURRENCY3, // "\"$\"#,##0.00_);(\"$\"#,##0.00)", + XLS_FORMAT_CURRENCY4, // "\"$\"#,##0.00_);[Red](\"$\"#,##0.00)", + XLS_FORMAT_PERCENT1, // "0%", + XLS_FORMAT_PERCENT2, // "0.00%", + XLS_FORMAT_SCIENTIFIC1, // "0.00E+00", + XLS_FORMAT_SCIENTIFIC2 = 34 // "##0.0E+0" +}; + +static const DWORD colors[] = + { + 0x000000, + 0xFFFFFF, + 0xFF0000, + 0x00FF00, + 0x0000FF, + 0xFFFF00, + 0xFF00FF, + 0x00FFFF, + 0x800000, + 0x008000, + 0x000080, + 0x808000, + 0x800080, + 0x008080, + 0xC0C0C0, + 0x808080, + 0x9999FF, + 0x993366, + 0xFFFFCC, + 0xCCFFFF, + 0x660066, + 0xFF8080, + 0x0066CC, + 0xCCCCFF, + 0x000080, + 0xFF00FF, + 0xFFFF00, + 0x00FFFF, + 0x800080, + 0x800000, + 0x008080, + 0x0000FF, + 0x00CCFF, + 0xCCFFFF, + 0xCCFFCC, + 0xFFFF99, + 0x99CCFF, + 0xFF99CC, + 0xCC99FF, + 0xFFCC99, + 0x3366FF, + 0x33CCCC, + 0x99CC00, + 0xFFCC00, + 0xFF9900, + 0xFF6600, + 0x666699, + 0x969696, + 0x003366, + 0x339966, + 0x003300, + 0x333300, + 0x993300, + 0x993366, + 0x333399, + 0x333333 + }; + + +// Display string if in debug mode +void verbose(char* str) +{ + if (xls_debug) + printf("libxls : %s\n",str); +} + +#ifdef HAVE_ICONV + +struct codepage_entry_t { + int code; + const char *name; +}; + +static struct codepage_entry_t _codepage_entries[] = { + { .code = 874, .name = "WINDOWS-874" }, + { .code = 932, .name = "SHIFT-JIS" }, + { .code = 936, .name = "WINDOWS-936" }, + { .code = 950, .name = "BIG-5" }, + { .code = 951, .name = "BIG5-HKSCS" }, + { .code = 1250, .name = "WINDOWS-1250" }, + { .code = 1251, .name = "WINDOWS-1251" }, + { .code = 1252, .name = "WINDOWS-1252" }, + { .code = 1253, .name = "WINDOWS-1253" }, + { .code = 1254, .name = "WINDOWS-1254" }, + { .code = 1255, .name = "WINDOWS-1255" }, + { .code = 1256, .name = "WINDOWS-1256" }, + { .code = 1257, .name = "WINDOWS-1257" }, + { .code = 1258, .name = "WINDOWS-1258" }, + { .code = 10000, .name = "MACROMAN" }, + { .code = 10004, .name = "MACARABIC" }, + { .code = 10005, .name = "MACHEBREW" }, + { .code = 10006, .name = "MACGREEK" }, + { .code = 10007, .name = "MACCYRILLIC" }, + { .code = 10010, .name = "MACROMANIA" }, + { .code = 10017, .name = "MACUKRAINE" }, + { .code = 10021, .name = "MACTHAI" }, + { .code = 10029, .name = "MACCENTRALEUROPE" }, + { .code = 10079, .name = "MACICELAND" }, + { .code = 10081, .name = "MACTURKISH" }, + { .code = 10082, .name = "MACCROATIAN" }, +}; + +static int codepage_compare(const void *key, const void *value) { + const struct codepage_entry_t *cp1 = key; + const struct codepage_entry_t *cp2 = value; + return cp1->code - cp2->code; +} + +static const char *encoding_for_codepage(WORD codepage) { + struct codepage_entry_t key = { .code = codepage }; + struct codepage_entry_t *result = bsearch(&key, _codepage_entries, + sizeof(_codepage_entries)/sizeof(_codepage_entries[0]), + sizeof(_codepage_entries[0]), &codepage_compare); + if (result) { + return result->name; + } + return "WINDOWS-1252"; +} + +static char* unicode_decode_iconv(const char *s, size_t len, iconv_t ic) { + char* outbuf = 0; + + if(s && len && ic) + { + size_t outlenleft = len; + int outlen = len; + size_t inlenleft = len; + const char* src_ptr = s; + char* out_ptr = 0; + + size_t st; + outbuf = malloc(outlen + 1); + + if(outbuf) + { + out_ptr = outbuf; + while(inlenleft) + { + st = iconv(ic, (ICONV_CONST char **)&src_ptr, &inlenleft, (char **)&out_ptr,(size_t *) &outlenleft); + if(st == (size_t)(-1)) + { + if(errno == E2BIG) + { + size_t diff = out_ptr - outbuf; + outlen += inlenleft; + outlenleft += inlenleft; + outbuf = realloc(outbuf, outlen + 1); + if(!outbuf) + { + break; + } + out_ptr = outbuf + diff; + } + else + { + free(outbuf), outbuf = NULL; + break; + } + } + } + } + outlen -= outlenleft; + + if(outbuf) + { + outbuf[outlen] = 0; + } + } + return outbuf; +} + +#endif + +// Convert UTF-16 to UTF-8 without iconv +static char *unicode_decode_wcstombs(const char *s, size_t len, xls_locale_t locale) { + // Do wcstombs conversion + char *converted = NULL; + int count, count2; + size_t i; + wchar_t *w = NULL; + + w = malloc((len/2+1)*sizeof(wchar_t)); + + for(i=0; i> 6); + *out++ = (BYTE)0x80 | (c & 0x3F); + } else { + *out++ = c; + } + } + *out = 0; + + return ret; +} + +// Convert BIFF5 string or compressed BIFF8 string to the encoding desired +// by the workbook. Returns a NUL-terminated string +char* codepage_decode(const char *s, size_t len, xlsWorkBook *pWB) { + if (!pWB->is5ver && strcmp(pWB->charset, "UTF-8") == 0) + return transcode_latin1_to_utf8(s, len); + +#ifdef HAVE_ICONV + if (!pWB->converter) { + const char *from_encoding = pWB->is5ver ? encoding_for_codepage(pWB->codepage) : "ISO-8859-1"; + iconv_t converter = iconv_open(pWB->charset, from_encoding); + if (converter == (iconv_t)-1) { + printf("conversion from '%s' to '%s' not available", from_encoding, pWB->charset); + return NULL; + } + pWB->converter = (void *)converter; + } + return unicode_decode_iconv(s, len, pWB->converter); +#else + char *ret = malloc(len+1); + memcpy(ret, s, len); + ret[len] = 0; + return ret; +#endif +} + +// Convert unicode string to UTF-8 +char* transcode_utf16_to_utf8(const char *s, size_t len) { + xls_locale_t locale = xls_createlocale(); + char *result = unicode_decode_wcstombs(s, len, locale); + xls_freelocale(locale); + return result; +} + +// Convert unicode string to the encoding desired by the workbook +char* unicode_decode(const char *s, size_t len, xlsWorkBook *pWB) +{ +#ifdef HAVE_ICONV +#if defined(_AIX) || defined(__sun) + const char *from_enc = "UTF-16le"; +#else + const char *from_enc = "UTF-16LE"; +#endif + if (!pWB->utf16_converter) { + iconv_t converter = iconv_open(pWB->charset, from_enc); + if (converter == (iconv_t)-1) { + printf("conversion from '%s' to '%s' not available\n", from_enc, pWB->charset); + return NULL; + } + pWB->utf16_converter = (void *)converter; + } + return unicode_decode_iconv(s, len, pWB->utf16_converter); +#else + if (!pWB->utf8_locale) { + xls_locale_t locale = xls_createlocale(); + if (locale == NULL) { + printf("creation of UTF-8 locale failed\n"); + return NULL; + } + pWB->utf8_locale = (void *)locale; + } + return unicode_decode_wcstombs(s, len, pWB->utf8_locale); +#endif +} + +// Read and decode string +char *get_string(const char *s, size_t len, BYTE is2, xlsWorkBook* pWB) +{ + WORD ln; + DWORD ofs = 0; + BYTE flag = 0; + const char *str = s; + char *ret = NULL; + + if (is2) { + // length is two bytes + if (ofs + 2 > len) { + return NULL; + } + ln= ((BYTE*)str)[0] + (((BYTE*)str)[1] << 8); + ofs+=2; + } else { + // single byte length + if (ofs + 1 > len) { + return NULL; + } + ln=*(BYTE*)str; + ofs++; + } + + if(!pWB->is5ver) { + // unicode strings have a format byte before the string + if (ofs + 1 > len) { + return NULL; + } + flag=*(BYTE*)(str+ofs); + ofs++; + } + if (flag&0x8) { + // WORD rt; + // rt=*(WORD*)(str+ofs); // unused + ofs+=2; + } + if (flag&0x4) { + // DWORD sz; + // sz=*(DWORD*)(str+ofs); // unused + ofs+=4; + } + if(flag & 0x1) { + if (ofs + 2*ln > len) { + return NULL; + } + ret = unicode_decode(str+ofs, ln*2, pWB); + } else { + if (ofs + ln > len) { + return NULL; + } + ret = codepage_decode(str+ofs, ln, pWB); + } + +#if 0 // debugging + if(xls_debug == 100) { + ofs += (flag & 0x1) ? ln*2 : ln; + + printf("ofs=%d ret[0]=%d\n", ofs, *ret); + { + unsigned char *ptr; + + ptr = ret; + + printf("%x %x %x %x %x %x %x %x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7] ); + printf("%s\n", ret); + } + } +#endif + + return ret; +} + +DWORD xls_getColor(const WORD color,WORD def) +{ + int cor=8; + int size = 64 - cor; + int max = size; + WORD idx=color; + if( idx >= cor) + idx -= cor; + if( idx < max ) + { + return colors[idx]; + } + else + return colors[def]; +} + + +void xls_showBookInfo(xlsWorkBook* pWB) +{ + verbose("BookInfo"); + printf(" is5ver: %i\n",pWB->is5ver); + printf("codepage: %i\n",pWB->codepage); + printf(" type: %.4X ",pWB->type); + switch (pWB->type) + { + case 0x5: + printf("Workbook globals\n"); + break; + case 0x6: + printf("Visual Basic module\n"); + break; + case 0x10: + printf("Worksheet\n"); + break; + case 0x20: + printf("Chart\n"); + break; + case 0x40: + printf("BIFF4 Macro sheet\n"); + break; + case 0x100: + printf("BIFF4W Workbook globals\n"); + break; + } + printf("------------------- END BOOK INFO---------------------------\n"); +} + + +void xls_showBOF(BOF* bof) +{ + printf("----------------------------------------------\n"); + verbose("BOF"); + printf(" ID: %.4Xh %s (%s)\n",bof->id,brdb[get_brbdnum(bof->id)].name,brdb[get_brbdnum(bof->id)].desc); + printf(" Size: %i\n",bof->size); +} + +#if 0 +static void xls_showBOUNDSHEET(BOUNDSHEET* bsheet) +{ + switch (bsheet->type & 0x000f) + { + case 0x0000: + /* worksheet or dialog sheet */ + verbose ("85: Worksheet or dialog sheet"); + break; + case 0x0001: + /* Microsoft Excel 4.0 macro sheet */ + verbose ("85: Microsoft Excel 4.0 macro sheet"); + break; + case 0x0002: + /* Chart */ + verbose ("85: Chart sheet"); + break; + case 0x0006: + /* Visual Basic module */ + verbose ("85: Visual Basic sheet"); + break; + default: + break; + } + printf(" Pos: %Xh\n",bsheet->filepos); + printf(" flags: %.4Xh\n",bsheet->type); + // printf(" Name: [%i] %s\n",bsheet->len,bsheet->name); +} +#endif + +void xls_showROW(struct st_row_data* row) +{ + verbose("ROW"); + printf(" Index: %i \n",row->index); + printf("First col: %i \n",row->fcell); + printf(" Last col: %i \n",row->lcell); + printf(" Height: %i (1/20 px)\n",row->height); + printf(" Flags: %.4X \n",row->flags); + printf(" xf: %i \n",row->xf); + printf("----------------------------------------------\n"); +} + +void xls_showColinfo(struct st_colinfo_data* col) +{ + verbose("COLINFO"); + printf("First col: %i \n",col->first); + printf(" Last col: %i \n",col->last); + printf(" Width: %i (1/256 px)\n",col->width); + printf(" XF: %i \n",col->xf); + printf(" Flags: %i (",col->flags); + if (col->flags & 0x1) + printf("hidden "); + if (col->flags & 0x700) + printf("outline "); + if (col->flags & 0x1000) + printf("collapsed "); + printf(")\n"); + printf("----------------------------------------------\n"); +} + +void xls_showCell(struct st_cell_data* cell) +{ + printf(" -----------\n"); + printf(" ID: %.4Xh %s (%s)\n",cell->id, brdb[get_brbdnum(cell->id)].name, brdb[get_brbdnum(cell->id)].desc); + printf(" Cell: %c:%u [%u:%u]\n",cell->col+'A',cell->row+1,cell->col,cell->row); +// printf(" Cell: %u:%u\n",cell->col+1,cell->row+1); + printf(" xf: %i\n",cell->xf); + if(cell->id == XLS_RECORD_BLANK) { + //printf("BLANK_CELL!\n"); + return; + } + printf(" double: %f\n",cell->d); + printf(" int: %d\n",cell->l); + if (cell->str!=NULL) + printf(" str: %s\n",cell->str); +} + + +void xls_showFont(struct st_font_data* font) +{ + + printf(" name: %s\n",font->name); + printf(" height: %i\n",font->height); + printf(" flag: %.4X\n",font->flag); + printf(" color: %.6X\n",font->color); + printf(" bold: %i\n",font->bold); + printf("escapement: %i\n",font->escapement); + printf(" underline: %i\n",font->underline); + printf(" family: %i\n",font->family); + printf(" charset: %i\n",font->charset); + +} +#if 0 +typedef struct st_format + { + long count; //Count of FORMAT's + struct st_format_data + { + WORD index; + char *value; + } + * format; + } + st_format; +#endif + +void xls_showFormat(struct st_format_data* frmt) +{ + printf(" index : %u\n", frmt->index); + printf(" value: %s\n", frmt->value); +} + +void xls_showXF(XF8* xf) +{ + static int idx; + + printf(" Index: %u\n",idx++); + printf(" Font: %u\n",xf->font); + printf(" Format: %u\n",xf->format); + printf(" Type: 0x%x\n",xf->type); + printf(" Align: 0x%x\n",xf->align); + printf(" Rotation: 0x%x\n",xf->rotation); + printf(" Ident: 0x%x\n",xf->ident); + printf(" UsedAttr: 0x%x\n",xf->usedattr); + printf(" LineStyle: 0x%x\n",xf->linestyle); + printf(" Linecolor: 0x%x\n",xf->linecolor); + printf("GroundColor: 0x%x\n",xf->groundcolor); +} + +char *xls_getfcell(xlsWorkBook* pWB, struct st_cell_data* cell, BYTE *label) +{ + struct st_xf_data *xf = NULL; + WORD len = 0; + DWORD offset = 0; + char *ret = NULL; + size_t retlen = 100; + + if (cell->xf < pWB->xfs.count) + xf=&pWB->xfs.xf[cell->xf]; + + switch (cell->id) + { + case XLS_RECORD_LABELSST: + offset = label[0] + (label[1] << 8); + if(!pWB->is5ver) { + offset += ((DWORD)label[2] << 16); + offset += ((DWORD)label[3] << 24); + } + if(offset < pWB->sst.count && pWB->sst.string[offset].str) { + ret = strdup(pWB->sst.string[offset].str); + } + break; + case XLS_RECORD_BLANK: + case XLS_RECORD_MULBLANK: + ret = strdup(""); + break; + case XLS_RECORD_LABEL: + case XLS_RECORD_RSTRING: + len = label[0] + (label[1] << 8); + label += 2; + if (pWB->is5ver || (*(label++) & 0x01) == 0) { + ret = codepage_decode((char *)label, len, pWB); + } else { + ret = unicode_decode((char *)label, len*2, pWB); + } + break; + case XLS_RECORD_RK: + case XLS_RECORD_NUMBER: + ret = malloc(retlen); + snprintf(ret, retlen, "%lf", cell->d); + break; + // if( RK || MULRK || NUMBER || FORMULA) + // if (cell->id==0x27e || cell->id==0x0BD || cell->id==0x203 || 6 (formula)) + default: + if (xf) { + ret = malloc(retlen); + switch (xf->format) + { + case XLS_FORMAT_GENERAL: + case XLS_FORMAT_NUMBER1: + case XLS_FORMAT_NUMBER3: + snprintf(ret, retlen, "%.0lf", cell->d); + break; + case XLS_FORMAT_NUMBER2: + case XLS_FORMAT_NUMBER4: + snprintf(ret, retlen, "%.2f", cell->d); + break; + case XLS_FORMAT_PERCENT1: + snprintf(ret, retlen, "%.0lf%%", 100 * cell->d); + break; + case XLS_FORMAT_PERCENT2: + snprintf(ret, retlen, "%.2lf%%", 100 * cell->d); + break; + case XLS_FORMAT_SCIENTIFIC1: + snprintf(ret, retlen, "%.2e", cell->d); + break; + case XLS_FORMAT_SCIENTIFIC2: + snprintf(ret, retlen, "%.1e", cell->d); + break; + default: + snprintf(ret, retlen, "%.2f", cell->d); + break; + } + break; + } + } + + return ret; +} + +char* xls_getCSS(xlsWorkBook* pWB) +{ + char color[255]; + char* align; + char* valign; + char borderleft[255]; + char borderright[255]; + char bordertop[255]; + char borderbottom[255]; + char italic[255]; + char underline[255]; + char bold[255]; + WORD size; + char fontname[255]; + struct st_xf_data* xf; + DWORD background; + DWORD i; + + char *ret = malloc(65535); + size_t buf_len = 4096; + char *buf = malloc(buf_len); + ret[0] = '\0'; + + for (i=0;ixfs.count;i++) + { + xf=&pWB->xfs.xf[i]; + switch ((xf->align & 0x70)>>4) + { + case 0: + valign=(char*)"top"; + break; + case 1: + valign=(char*)"middle"; + break; + case 2: + valign=(char*)"bottom"; + break; + // case 3: valign=(char*)"right"; break; + // case 4: valign=(char*)"right"; break; + default: + valign=(char*)"middle"; + break; + } + + switch (xf->align & 0x07) + { + case 1: + align=(char*)"left"; + break; + case 2: + align=(char*)"center"; + break; + case 3: + align=(char*)"right"; + break; + default: + align=(char*)"left"; + break; + } + + switch (xf->linestyle & 0x0f) + { + case 0: + snprintf(borderleft, sizeof(borderleft), "%s", ""); + break; + default: + snprintf(borderleft, sizeof(borderleft), "border-left: 1px solid black;"); + break; + } + + switch (xf->linestyle & 0x0f0) + { + case 0: + snprintf(borderright, sizeof(borderright), "%s", ""); + break; + default: + snprintf(borderright, sizeof(borderright), "border-right: 1px solid black;"); + break; + } + + switch (xf->linestyle & 0x0f00) + { + case 0: + snprintf(bordertop, sizeof(bordertop), "%s", ""); + break; + default: + snprintf(bordertop, sizeof(bordertop), "border-top: 1px solid black;"); + break; + } + + switch (xf->linestyle & 0x0f000) + { + case 0: + snprintf(borderbottom, sizeof(borderbottom), "%s", ""); + break; + default: + snprintf(borderbottom, sizeof(borderbottom), "border-bottom: 1px solid Black;"); + break; + } + + if (xf->font) + snprintf(color, sizeof(color), "color:#%.6X;",xls_getColor(pWB->fonts.font[xf->font-1].color,0)); + else + snprintf(color, sizeof(color), "%s", ""); + + if (xf->font && (pWB->fonts.font[xf->font-1].flag & 2)) + snprintf(italic, sizeof(italic), "font-style: italic;"); + else + snprintf(italic, sizeof(italic), "%s", ""); + + if (xf->font && (pWB->fonts.font[xf->font-1].bold>400)) + snprintf(bold, sizeof(bold), "font-weight: bold;"); + else + snprintf(bold, sizeof(bold), "%s", ""); + + if (xf->font && (pWB->fonts.font[xf->font-1].underline)) + snprintf(underline, sizeof(underline), "text-decoration: underline;"); + else + snprintf(underline, sizeof(underline), "%s", ""); + + if (xf->font) + size=pWB->fonts.font[xf->font-1].height/20; + else + size=10; + + if (xf->font) + snprintf(fontname, sizeof(fontname),"%s",pWB->fonts.font[xf->font-1].name); + else + snprintf(fontname, sizeof(fontname),"Arial"); + + background=xls_getColor((WORD)(xf->groundcolor & 0x7f),1); + snprintf(buf, buf_len, ".xf%i{ font-size:%ipt;font-family: \"%s\";background:#%.6X;text-align:%s;vertical-align:%s;%s%s%s%s%s%s%s%s}\n", + i,size,fontname,background,align,valign,borderleft,borderright,bordertop,borderbottom,color,italic,bold,underline); + + strcat(ret,buf); + } + ret = realloc(ret, strlen(ret)+1); + free(buf); + + return ret; +} diff --git a/3rdparty/libs/fileext/excel/sheet.cpp b/3rdparty/libs/fileext/excel/sheet.cpp deleted file mode 100644 index beb770f..0000000 --- a/3rdparty/libs/fileext/excel/sheet.cpp +++ /dev/null @@ -1,1689 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file sheet.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 10.02.2018 - */ -#include "tools.hpp" - -#include "biffh.hpp" - -#include "sheet.hpp" - -#include - - -namespace excel { - -/** XL_SHRFMLA types */ -const std::vector XL_SHRFMLA_ETC { - XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, XL_ARRAY2, XL_TABLEOP_B2 -}; -/** Cell horizontal aligment list */ -const std::vector CELL_HORZ_ALIGN { - "left", "left", "center", "right", "justify", "justify", "center", "center" -}; -/** Cell vertical aligment list */ -const std::vector CELL_VERT_ALIGN { - "top", "middle", "bottom", "middle", "middle" -}; -/** Cell border type list */ -const std::vector CELL_BORDER_TYPE { - "none", "solid", "solid", "dashed", "dotted", "solid", "double", "dotted", - "dashed", "dashed", "dashed", "dotted", "dotted", "dashed" -}; -/** Cell border size list */ -const std::vector CELL_BORDER_SIZE { - 1, 1, 2, 1, 1, 3, 1, 1, 2, 1, 2, 1, 2, 3 -}; - -/** Table parts background color map. `type`: {`firstRow`, `evenRow`, `oddRow`} */ -const std::unordered_map> TABLE_BACKGROUND { - // Light - {101, {"", "D9D9D9", ""}}, - {102, {"", "DDEBF7", ""}}, - {103, {"", "FCE4D6", ""}}, - {104, {"", "EDEDED", ""}}, - {105, {"", "2CC", ""}}, - {106, {"", "D9E1F2", ""}}, - {107, {"", "E2EFDA", ""}}, - {108, {"000000", "", ""}}, - {109, {"5B9BD5", "", ""}}, - {110, {"ED7D31", "", ""}}, - {111, {"A5A5A5", "", ""}}, - {112, {"FFC000", "", ""}}, - {113, {"4472C4", "", ""}}, - {114, {"70AD47", "", ""}}, - {115, {"", "D9D9D9", ""}}, - {116, {"", "DDEBF7", ""}}, - {117, {"", "FCE4D6", ""}}, - {118, {"", "EDEDED", ""}}, - {119, {"", "FFC000", ""}}, - {120, {"", "D9E1F2", ""}}, - {121, {"", "E2EFDA", ""}}, - // Medium - {201, {"000000", "D9D9D9", ""}}, - {202, {"5B9BD5", "DDEBF7", ""}}, - {203, {"ED7D31", "FCE4D6", ""}}, - {204, {"A5A5A5", "EDEDED", ""}}, - {205, {"FFC000", "2CC", ""}}, - {206, {"4472C4", "D9E1F2", ""}}, - {207, {"70AD47", "E2EFDA", ""}}, - {208, {"000000", "A6A6A6", "D9D9D9"}}, - {209, {"5B9BD5", "BDD7EE", "DDEBF7"}}, - {210, {"ED7D31", "F8CBAD", "FCE4D6"}}, - {211, {"A5A5A5", "DBDBDB", "EDEDED"}}, - {212, {"FFC000", "FFE699", "FFF2CC"}}, - {213, {"4472C4", "B4C6E7", "D9E1F2"}}, - {214, {"70AD47", "C6E0B4", "E2EFDA"}}, - {215, {"000000", "D9D9D9", ""}}, - {216, {"5B9BD5", "D9D9D9", ""}}, - {217, {"ED7D31", "D9D9D9", ""}}, - {218, {"A5A5A5", "D9D9D9", ""}}, - {219, {"FFC000", "D9D9D9", ""}}, - {220, {"4472C4", "D9D9D9", ""}}, - {221, {"70AD47", "D9D9D9", ""}}, - {222, {"D9D9D9", "A6A6A6", "D9D9D9"}}, - {223, {"DDEBF7", "BDD7EE", "DDEBF7"}}, - {224, {"FCE4D6", "F8CBAD", "FCE4D6"}}, - {225, {"EDEDED", "DBDBDB", "EDEDED"}}, - {226, {"FFF2CC", "FFE699", "FFF2CC"}}, - {227, {"D9E1F2", "B4C6E7", "D9E1F2"}}, - {228, {"E2EFDA", "C6E0B4", "E2EFDA"}}, - // Dark - {301, {"000", "404040", "737373"}}, - {302, {"000", "2F75B5", "5B9BD5"}}, - {303, {"000", "C65911", "ED7D31"}}, - {304, {"000", "7B7B7B", "A5A5A5"}}, - {305, {"000", "BF8F00", "FFC000"}}, - {306, {"000", "305496", "4472C4"}}, - {307, {"000", "548235", "70AD47"}}, - {308, {"000", "A6A6A6", "D9D9D9"}}, - {309, {"ED7D31", "BDD7EE", "DDEBF7"}}, - {310, {"FFC000", "DBDBDB", "EDEDED"}}, - {311, {"70AD47", "B4C6E7", "D9E1F2"}} -}; -/** Table parts font color map. `type`: {`firstRow`, `otherRow`} */ -const std::unordered_map> TABLE_COLOR { - // Light - {102, {"2F75B5", "2F75B5"}}, - {103, {"C65911", "C65911"}}, - {104, {"7B7B7B", "7B7B7B"}}, - {105, {"BF8F00", "BF8F00"}}, - {106, {"305496", "305496"}}, - {107, {"548235", "548235"}}, - {108, {"fff", ""}}, - // Medium - {201, {"fff", ""}}, - {202, {"fff", ""}}, - {203, {"fff", ""}}, - {204, {"fff", ""}}, - {205, {"fff", ""}}, - {206, {"fff", ""}}, - {207, {"fff", ""}}, - {208, {"fff", ""}}, - // Dark - {301, {"fff", "fff"}}, - {302, {"fff", "fff"}}, - {303, {"fff", "fff"}}, - {304, {"fff", "fff"}}, - {305, {"fff", "fff"}}, - {306, {"fff", "fff"}}, - {307, {"fff", "fff"}}, - {308, {"fff", ""}}, -}; - - -// public: -Sheet::Sheet(Book* book, int position, const std::string& name, size_t number, std::string &text) -: m_book(book), m_sheetContent(text), m_name(name), m_number(number), - m_maxRowCount((m_book->m_biffVersion >= 80) ? 65536 : 16384), m_position(position) {} - -void Sheet::read() { - try { - bool isSstRichtext = m_book->m_addStyle && !m_book->m_richtextRunlistMap.empty(); - std::map, Rowinfo> rowinfoSharingDict; - std::unordered_map msTxos; - bool eofFound = false; - int savedObjectId; - int oldPosition = m_book->m_position; - m_book->m_position = m_position; - while (true) { - unsigned short code; - unsigned short size; - std::string data; - m_book->getRecordParts(code, size, data); - if (code == XL_NUMBER) { - // [:14] in following stmt ignores extraneous rubbish at end of record - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - double d = m_book->readByte(data, 6, 8); - - append(std::to_string(d)); - // putCell(rowIndex, colIndex, std::to_string(d), xfIndex); - } else if (code == XL_LABELSST) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - int sstIndex = m_book->readByte(data, 6, 4); - - append(m_book->m_sharedStrings[sstIndex]); - // putCell(rowIndex, colIndex, m_book->m_sharedStrings[sstIndex], xfIndex); - if (isSstRichtext) { - auto& runlist = m_book->m_richtextRunlistMap[sstIndex]; - if (!runlist.empty()) - m_richtextRunlistMap[{rowIndex, colIndex}] = runlist; - } - } else if (code == XL_LABEL) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - std::string str; - - if (m_book->m_biffVersion < 80) - str = m_book->unpackString(data, 6, 2); - else - str = m_book->unpackUnicode(data, 6, 2); - append(str); - // putCell(rowIndex, colIndex, str, xfIndex); - } else if (code == XL_RSTRING) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - int pos = 6; - std::string str; - std::vector> runlist; - - if (m_book->m_biffVersion < 80) { - str = m_book->unpackStringUpdatePos(data, pos, 2); - char nrt = data[pos]; - pos += 1; - for (int i = 0; i < nrt; ++i) { - runlist.emplace_back( - m_book->readByte(data, pos, 1), - m_book->readByte(data, pos+1, 1) - ); - pos += 2; - } - } else { - str = m_book->unpackUnicodeUpdatePos(data, pos, 2); - unsigned short nrt = m_book->readByte(data, pos, 2); - pos += 2; - for (int i = 0; i < nrt; ++i) { - runlist.emplace_back( - m_book->readByte(data, pos, 2), - m_book->readByte(data, pos+2, 2) - ); - pos += 4; - } - } - append(str); - // putCell(rowIndex, colIndex, str, xfIndex); - m_richtextRunlistMap[{rowIndex, colIndex}] = runlist; - } else if (code == XL_RK) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - double d = unpackRK(data.substr(6, 4)); - - append(std::to_string(d)); - // putCell(rowIndex, colIndex, std::to_string(d), xfIndex); - } else if (code == XL_MULRK) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short firstCol = m_book->readByte(data, 2, 2); - unsigned short lastCol = m_book->readByte(data, (int)data.size()-2, 2); - int pos = 4; - - for (int i = firstCol; i <= lastCol; ++i) { - unsigned short xfIndex = m_book->readByte(data, pos, 2); - double d = unpackRK(data.substr(pos+2, 4)); - pos += 6; - - append(std::to_string(d)); - // putCell(rowIndex, i, std::to_string(d), xfIndex); - } - } else if (code == XL_ROW) { - if (!m_book->m_addStyle) - continue; - - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short flag1 = m_book->readByte(data, 6, 2); - int flag2 = m_book->readByte(data, 12, 4); - if (!(0 <= rowIndex && rowIndex < m_maxRowCount)) - continue; - - auto key = std::make_pair(flag1, flag2); - Rowinfo rowinfo; - if (rowinfoSharingDict.find(key) == rowinfoSharingDict.end()) { - // Using upkbits() is far too slow on a file with 30 sheets each with 10K rows. So: - rowinfo.m_height = flag1 & 0x7fff; - rowinfo.m_hasDefaultHeight = (flag1 >> 15) & 1; - rowinfo.m_outlineLevel = flag2 & 7; - rowinfo.m_isOutlineGroupStartsEnds = (flag2 >> 4) & 1; - rowinfo.m_isHidden = (flag2 >> 5) & 1; - rowinfo.m_isHeightMismatch = (flag2 >> 6) & 1; - rowinfo.m_hasDefaultXfIndex = (flag2 >> 7) & 1; - rowinfo.m_xfIndex = (flag2 >> 16) & 0xfff; - rowinfo.m_hasAdditionalSpaceAbove = (flag2 >> 28) & 1; - rowinfo.m_hasAdditionalSpaceBelow = (flag2 >> 29) & 1; - if (!rowinfo.m_hasDefaultXfIndex) - rowinfo.m_xfIndex = -1; - rowinfoSharingDict[key] = rowinfo; - } else - rowinfo = rowinfoSharingDict[key]; - m_rowinfoMap[rowIndex] = rowinfo; - } else if (code == 0x0006 || code == 0x0406 || code == 0x0206) { - unsigned short rowIndex; - unsigned short colIndex; - unsigned short xfIndex; - unsigned short flags; - std::string result; - if (m_book->m_biffVersion >= 50) { - rowIndex = m_book->readByte(data, 0, 2); - colIndex = m_book->readByte(data, 2, 2); - xfIndex = m_book->readByte(data, 4, 2); - result = m_book->readByte(data, 6, 8); - flags = m_book->readByte(data, 14, 2); - } else if (m_book->m_biffVersion >= 30) { - rowIndex = m_book->readByte(data, 0, 2); - colIndex = m_book->readByte(data, 2, 2); - xfIndex = m_book->readByte(data, 4, 2); - result = m_book->readByte(data, 6, 8); - flags = m_book->readByte(data, 14, 2); - } - // BIFF2 - else { - rowIndex = m_book->readByte(data, 0, 2); - colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - result = m_book->readByte(data, 7, 8); - flags = m_book->readByte(data, 15, 1); - - xfIndex = fixedXfIndexB2(cellAttributes); - } - - if (result.substr(6, 2) == "\xFF\xFF") { - char firstByte = result[0]; - if (firstByte == 0) { - // Need to read next record (STRING) - bool gotString = false; - // "if flags & 8" applies only to SHRFMLA - // Actually there's an optional SHRFMLA or ARRAY etc record to skip over - unsigned short code2; - unsigned short size2; - std::string data2; - - m_book->getRecordParts(code2, size2, data2); - if (code2 == XL_STRING || code2 == XL_STRING_B2) - gotString = true; - else if (find(XL_SHRFMLA_ETC.begin(), XL_SHRFMLA_ETC.end(), code2) == XL_SHRFMLA_ETC.end()) - throw std::logic_error( - "Expected SHRFMLA, ARRAY, TABLEOP* or STRING record; found " + - std::to_string(code2) - ); - - // Now for the STRING record - if (!gotString) { - m_book->getRecordParts(code2, size2, data2); - if (code2 != XL_STRING && code2 != XL_STRING_B2) - throw std::logic_error( - "Expected STRING record; found " + - std::to_string(code2) - ); - } - std::string str = stringRecordContent(data2); - append(str); - // putCell(rowIndex, colIndex, str, xfIndex); - } - // Boolean formula result - else if (firstByte == 1) { - append(std::string(1, result[2])); - // putCell(rowIndex, colIndex, std::string(1, result[2]), xfIndex); - } - // Error in cell - else if (firstByte == 2) { - append(std::string(1, result[2])); - // putCell(rowIndex, colIndex, std::string(1, result[2]), xfIndex); - } - // Empty ... i.e. empty (zero-length) string, NOT an empty cell - else if (firstByte == 3) { - // putCell(rowIndex, colIndex, "", xfIndex); - } - else { - throw std::logic_error( - "Unexpected special case (" + std::to_string(firstByte) + - ") in FORMULA" - ); - } - } - // It is a number - else { - double d = m_book->readByte(result, 0, 8); - append(std::to_string(d)); - // putCell(rowIndex, colIndex, std::to_string(d), xfIndex); - } - } else if (code == XL_BOOLERR) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - unsigned char value = m_book->readByte(data, 6, 1); - //unsigned char hasError = m_book->readByte(data, 7, 1); - // Note: OOo Calc 2.0 writes 9-byte BOOLERR records. OOo docs say 8. Excel writes 8 - //int cellType = hasError ? XL_CELL_ERROR : XL_CELL_BOOLEAN; - append(std::string(1, value)); - // putCell(rowIndex, colIndex, std::string(1, value), xfIndex); - } else if (code == XL_COLINFO) { - if (!m_book->m_addStyle) - continue; - - Colinfo colinfo; - unsigned short firstColIndex = m_book->readByte(data, 0, 2); - unsigned short lastColIndex = m_book->readByte(data, 2, 2); - colinfo.m_width = m_book->readByte(data, 4, 2); - colinfo.m_xfIndex = m_book->readByte(data, 6, 1); - unsigned short flags = m_book->readByte(data, 8, 2); - // #Colinfo.m_width is denominated in 256ths of a character, not in characters - // Note: 256 instead of 255 is a common mistake. Silently ignore non-existing - // 257th column in that case - if (0 > firstColIndex || firstColIndex > lastColIndex || lastColIndex > 256) - continue; - - colinfo.m_isHidden = (flags & 0x0001) >> 0; - colinfo.m_bitFlag = (flags & 0x0002) >> 1; - colinfo.m_outlineLevel = (flags & 0x0700) >> 8; - colinfo.m_isCollapsed = (flags & 0x1000) >> 12; - - for (int i = firstColIndex; i <= lastColIndex; ++i) { - // Excel does 0 to 256 inclusive - if (i > 255) - break; - m_colinfoMap[i] = colinfo; - } - } else if (code == XL_DEFCOLWIDTH) { - m_defaultColWidth = m_book->readByte(data, 0, 2); - } else if (code == XL_STANDARDWIDTH) { - m_standardWidth = m_book->readByte(data, 0, 2); - } else if (code == XL_GCW) { - // Useless w/o COLINFO - if (!m_book->m_addStyle) - continue; - - std::vector iguff; - for (int i = 0; i < 8; ++i) - iguff.emplace_back(m_book->readByte(data, 2 + i*4, 4)); - - m_gcw.clear(); - for (auto& bits : iguff) { - for (int i = 0; i < 32; ++i) { - m_gcw.push_back(bits & 1); - bits >>= 1; - } - } - } else if (code == XL_BLANK) { - if (!m_book->m_addStyle) - continue; - - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - unsigned short xfIndex = m_book->readByte(data, 4, 2); - - // putCell(rowIndex, colIndex, "", xfIndex); - } else if (code == XL_MULBLANK) { // 00BE - if (!m_book->m_addStyle) - continue; - - std::vector result; - for (int i = 0; i < (size >> 1); ++i) - result.emplace_back(m_book->readByte(data, 0 + i*2, 2)); - - auto mul_last = result.back(); - int pos = 2; - for (int colx = result[1]; colx < mul_last+1; ++colx) { - // putCell(result[0], colx, "", result[pos]); - pos += 1; - } - } else if (code == XL_DIMENSION || code == XL_DIMENSION2) { - // Four zero bytes after some other record - if (size == 0) - continue; - if (m_book->m_biffVersion < 80) { - m_dimensionRowCount = m_book->readByte(data, 2, 2); - m_dimensionColCount = m_book->readByte(data, 6, 2); - } - else { - m_dimensionRowCount = m_book->readByte(data, 4, 4); - m_dimensionColCount = m_book->readByte(data, 10, 2); - } - m_rowCount = 0; - m_colCount = 0; - - if ( - (m_book->m_biffVersion == 21 || m_book->m_biffVersion == 30 || m_book->m_biffVersion == 40) && - !m_book->m_xfList.empty() && !m_book->m_xfEpilogueDone - ) { - Formatting formatting(m_book); - formatting.xfEpilogue(); - } - } - else if (code == XL_HLINK) { - handleHyperlink(data); - } - else if (code == XL_QUICKTIP) { - handleQuicktip(data); - } - else if (code == XL_EOF) { - eofFound = true; - break; - } - else if (code == XL_OBJ) { - // Handle SHEET-level objects - MSObj savedObject; - handleMSObj(data, savedObject); - savedObjectId = savedObject.m_isNull ? -1 : savedObject.m_id; - } - //else if (code == XL_MSO_DRAWING) { - // handleMsodrawingetc(code, size, data); - //} - else if (code == XL_TXO) { - MSTxo msTxo; - handleMSTxo(data, msTxo); - if (!msTxo.m_isNull && (savedObjectId > 0)) { - msTxos[savedObjectId] = msTxo; - savedObjectId = -1; - } - } - else if (code == XL_NOTE) { - handleNote(data, msTxos); - } - //else if (code == XL_FEAT11) { - // handleFeat11(data); - //} - else if (find(BOF_CODES.begin(), BOF_CODES.end(), code) != BOF_CODES.end()) { - //unsigned short version = m_book->readByte(data, 0, 2); - //unsigned short bofType = m_book->readByte(data, 2, 2); - unsigned short code2; - - while (true) { - m_book->getRecordParts(code2, size, data); - if (code2 == XL_EOF) - break; - } - } - else if (code == XL_COUNTRY) { - // Handle country - m_book->m_countries = { - m_book->readByte(data, 0, 2), - m_book->readByte(data, 2, 2) - }; - } - else if (code == XL_LABELRANGES) { - int pos = 0; - unpackCellRangeAddressListUpdatePos(m_rowLabelRanges, data, pos, 8); - unpackCellRangeAddressListUpdatePos(m_colLabelRanges, data, pos, 8); - } - //else if (code == XL_ARRAY) { - // unsigned short firstRowIndex = m_book->readByte(data, 0, 2); - // unsigned short lastRowIndex = m_book->readByte(data, 2, 2); - // unsigned char firstColIndex = m_book->readByte(data, 4, 1); - // unsigned char lastColIndex = m_book->readByte(data, 5, 1); - // unsigned char flags = m_book->readByte(data, 6, 1); - // unsigned short tokenLength = m_book->readByte(data, 12, 2); - //} - //else if (code == XL_SHRFMLA) { - // unsigned short firstRowIndex = m_book->readByte(data, 0, 2); - // unsigned short lastRowIndex = m_book->readByte(data, 2, 2); - // unsigned char firstColIndex = m_book->readByte(data, 4, 1); - // unsigned char lastColIndex = m_book->readByte(data, 5, 1); - // unsigned char formulaCount = m_book->readByte(data, 7, 1); - // unsigned short tokenLength = m_book->readByte(data, 8, 2); - //} - else if (code == XL_CONDFMT) { - if (!m_book->m_addStyle) - continue; - - //unsigned short cfCount = m_book->readByte(data, 0, 2); - //unsigned short needRecalc = m_book->readByte(data, 2, 2); - //unsigned short rowIndex1 = m_book->readByte(data, 4, 2); - //unsigned short rowIndex2 = m_book->readByte(data, 6, 2); - //unsigned short colIndex1 = m_book->readByte(data, 8, 2); - //unsigned short colIndex2 = m_book->readByte(data, 10, 2); - - int pos = 12; - std::vector> oList; // Updated by function - unpackCellRangeAddressListUpdatePos(oList, data, pos, 8); - } - else if (code == XL_CF) { - if (!m_book->m_addStyle) - continue; - - //unsigned char cfType = m_book->readByte(data, 0, 1); - //unsigned char cmpOp = m_book->readByte(data, 1, 1); - unsigned short size1 = m_book->readByte(data, 2, 2); - unsigned short size2 = m_book->readByte(data, 4, 2); - int flags = m_book->readByte(data, 6, 4); - bool fontBlock = (flags >> 26) & 1; - bool borderBlock = (flags >> 28) & 1; - bool paletteBlock = (flags >> 29) & 1; - - int pos = 12; - if (fontBlock) { - //int fontHeight = m_book->readByte(data, 64, 4); - //int fontOptions = m_book->readByte(data, 68, 4); - //unsigned short weight = m_book->readByte(data, 72, 2); - //unsigned short escapement = m_book->readByte(data, 74, 2); - //unsigned char underline = m_book->readByte(data, 76, 1); - //int fontColorIndex = m_book->readByte(data, 80, 4); - //int twoBits = m_book->readByte(data, 88, 4); - //int fontEscapment = m_book->readByte(data, 92, 4); - //int fontUnderlying = m_book->readByte(data, 96, 4); - - //bool fontStyle = (twoBits > 1) & 1; - //bool posture = (fontOptions > 1) & 1; - //bool fontCancel = (twoBits > 7) & 1; - //bool cancellation = (fontOptions > 7) & 1; - pos += 118; - } - if (borderBlock) - pos += 8; - if (paletteBlock) - pos += 4; - std::string formula1 = data.substr(pos, size1); - pos += size1; - std::string formula2 = data.substr(pos, size2); - pos += size2; - } - else if (code == XL_DEFAULTROWHEIGHT) { - unsigned short bits; - if (size == 4) { - bits = m_book->readByte(data, 0, 2); - m_defaultRowHeight = m_book->readByte(data, 2, 2); - } - else if (size == 2) { - bits = 0; - m_defaultRowHeight = m_book->readByte(data, 0, 2); - } - else { - bits = 0; - } - m_isDefaultRowHeightMismatch = bits & 1; - m_isDefaultRowHidden = (bits >> 1) & 1; - m_hasDefaultAdditionalSpaceAbove = (bits >> 2) & 1; - m_hasDefaultAdditionalSpaceBelow = (bits >> 3) & 1; - } - else if (code == XL_MERGEDCELLS) { - if (!m_book->m_addStyle) - continue; - - int pos = 0; - unpackCellRangeAddressListUpdatePos(m_mergedCells, data, pos, 8); - } - else if (code == XL_WINDOW2) { - unsigned short options; - if (m_book->m_biffVersion >= 80 && size >= 14) { - options = m_book->readByte(data, 0, 2); - m_firstVisibleRowIndex = m_book->readByte(data, 2, 2); - m_firstVisibleColIndex = m_book->readByte(data, 4, 2); - m_gridlineColorIndex = m_book->readByte(data, 6, 2); - m_cachedPageBreakPreviewMagFactor = m_book->readByte(data, 8, 2); - m_cachedNormalViewMagFactor = m_book->readByte(data, 10, 2); - } - else { - options = m_book->readByte(data, 0, 2); - m_firstVisibleRowIndex = m_book->readByte(data, 2, 2); - m_firstVisibleColIndex = m_book->readByte(data, 4, 2); - m_gridlineColor = { - m_book->readByte(data, 6, 1), - m_book->readByte(data, 7, 1), - m_book->readByte(data, 8, 1), - }; - m_gridlineColorIndex = Formatting::getNearestColorIndex(m_book->m_colorMap, m_gridlineColor); - } - - m_showFormula = (options >> 0) & 1; - m_showGridLine = (options >> 1) & 1; - m_showSheetHeader = (options >> 2) & 1; - m_isFrozenPanes = (options >> 3) & 1; - m_showZeroValue = (options >> 4) & 1; - m_automaticGridLineColor = (options >> 5) & 1; - m_columnsRightToLeft = (options >> 6) & 1; - m_showOutlineSymbol = (options >> 7) & 1; - m_removeSplits = (options >> 8) & 1; - m_isSheetSelected = (options >> 9) & 1; - m_isSheetVisible = (options >> 10) & 1; - m_showPageBreakPreview = (options >> 11) & 1; - } - else if (code == XL_SCL) { - unsigned short num = m_book->readByte(data, 0, 2); - unsigned short den = m_book->readByte(data, 2, 2); - int result = 0; - if (den) - result = (num * 100); - if (!(10 <= result && result <= 400)) - result = 100; - m_sclMagFactor = result; - } - else if (code == XL_PANE) { - m_vertSplitPos = m_book->readByte(data, 0, 2); - m_horzSplitPos = m_book->readByte(data, 2, 2); - m_horzSplitFirstVisible = m_book->readByte(data, 4, 2); - m_vertSplitFirstVisible = m_book->readByte(data, 6, 2); - m_splitActivePane = m_book->readByte(data, 8, 1); - m_hasPaneRecord = true; - } - else if (code == XL_HORIZONTALBREAKS) { - if (!m_book->m_addStyle) - continue; - - //unsigned short breakCount = m_book->readByte(data, 0, 2); - int pos = 2; - if (m_book->m_biffVersion < 80) - while (pos < size) { - m_horizontalPageBreaks.push_back({ - m_book->readByte(data, pos, 2), - 0, - 255 - }); - pos += 2; - } - else - while (pos < size) { - m_horizontalPageBreaks.push_back({ - m_book->readByte(data, pos, 2), - m_book->readByte(data, pos+2, 2), - m_book->readByte(data, pos+4, 2) - }); - pos += 6; - } - } - else if (code == XL_VERTICALPAGEBREAKS) { - if (!m_book->m_addStyle) - continue; - - //unsigned short breakCount = m_book->readByte(data, 0, 2); - int pos = 2; - if (m_book->m_biffVersion < 80) - while (pos < size) { - m_verticalPageBreaks.push_back({ - m_book->readByte(data, pos, 2), - 0, - 65535 - }); - pos += 2; - } - else - while (pos < size) { - m_verticalPageBreaks.push_back({ - m_book->readByte(data, pos, 2), - m_book->readByte(data, pos+2, 2), - m_book->readByte(data, pos+4, 2) - }); - pos += 6; - } - } - // All of the following are for BIFF <= 4W - else if (m_book->m_biffVersion <= 45) { - Formatting formatting(m_book); - if (code == XL_FORMAT || code == XL_FORMAT2) - formatting.handleFormat(data, code); - else if (code == XL_FONT || code == XL_FONT_B3B4) - formatting.handleFont(data); - else if (code == XL_STYLE) { - if (!m_book->m_xfEpilogueDone) - formatting.xfEpilogue(); - formatting.handleStyle(data); - } - else if (code == XL_PALETTE) - formatting.handlePalette(data); - else if (code == XL_BUILTINFMTCOUNT) - m_book->m_builtinFormatCount = m_book->readByte(data, 0, 2); - else if (code == XL_XF4 || code == XL_XF3 || code == XL_XF2) // N.B. not XL_XF - formatting.handleXf(data); - else if (code == XL_DATEMODE) - m_book->m_dateMode = m_book->readByte(data, 0, 2); - else if (code == XL_CODEPAGE) { - m_book->m_codePage = m_book->readByte(data, 0, 2); - m_book->getEncoding(); - } - else if (code == XL_WRITEACCESS) - m_book->handleWriteAccess(data); - else if (code == XL_IXFE) - m_ixfe = m_book->readByte(data, 0, 2); - else if (code == XL_NUMBER_B2) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - double d = m_book->readByte(data, 7, 4); - - append(std::to_string(d)); - // putCell(rowIndex, colIndex, std::to_string(d), fixedXfIndexB2(cellAttributes)); - } - else if (code == XL_INTEGER) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - float d = m_book->readByte(data, 7, 2); - - append(std::to_string(d)); - // putCell(rowIndex, colIndex, std::to_string(d), fixedXfIndexB2(cellAttributes)); - } - else if (code == XL_LABEL_B2) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - std::string str = m_book->unpackString(data, 7, 1); - - append(str); - // putCell(rowIndex, colIndex, str, fixedXfIndexB2(cellAttributes)); - } - else if (code == XL_BOOLERR_B2) { - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - unsigned char value = m_book->readByte(data, 7, 1); - //unsigned char hasError = m_book->readByte(data, 8, 1); - - //int cellType = hasError ? XL_CELL_ERROR : XL_CELL_BOOLEAN; - append(std::to_string(value)); - // putCell(rowIndex, colIndex, std::to_string(value), fixedXfIndexB2(cellAttributes)); - } - else if (code == XL_BLANK_B2) { - if (!m_book->m_addStyle) - continue; - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short colIndex = m_book->readByte(data, 2, 2); - std::string cellAttributes = m_book->readByte(data, 4, 3); - - // putCell(rowIndex, colIndex, "", fixedXfIndexB2(cellAttributes)); - } - else if (code == XL_EFONT) { - if (!m_book->m_addStyle) - continue; - m_book->m_fontList.back().m_color.m_index = m_book->readByte(data, 0, 2); - } - else if (code == XL_ROW_B2) { - if (!m_book->m_addStyle) - continue; - - unsigned short rowIndex = m_book->readByte(data, 0, 2); - unsigned short flag1 = m_book->readByte(data, 6, 2); - unsigned char flag2 = m_book->readByte(data, 10, 1); - - if (!(0 <= rowIndex && rowIndex < m_maxRowCount)) - continue; - int xfIndex; - // hasDefaultXfIndex is false - if (!(flag2 & 1)) { - xfIndex = -1; - } - // Seems XF index in the cellAttributes is dodgy - else if (size == 18) { - unsigned short xfx = m_book->readByte(data, 16, 2); - xfIndex = fixedXfIndexB2("", xfx); - } - else { - std::string cellAttributes = data.substr(13, 3); - xfIndex = fixedXfIndexB2(cellAttributes); - } - - auto key = std::make_pair(flag1, flag2); - Rowinfo rowinfo; - if (rowinfoSharingDict.find(key) == rowinfoSharingDict.end()) { - rowinfo.m_height = flag1 & 0x7fff; - rowinfo.m_hasDefaultHeight = (flag1 >> 15) & 1; - rowinfo.m_hasDefaultXfIndex = flag2 & 1; - rowinfo.m_xfIndex = xfIndex; - } - else { - rowinfo = rowinfoSharingDict[key]; - } - m_rowinfoMap[rowIndex] = rowinfo; - } - else if (code == XL_COLWIDTH) { // BIFF2 only - if (!m_book->m_addStyle) - continue; - - unsigned char firstColIndex = m_book->readByte(data, 0, 1); - unsigned char lastColIndex = m_book->readByte(data, 1, 1); - unsigned short width = m_book->readByte(data, 2, 2); - - if (firstColIndex > lastColIndex) - continue; - for (int i = firstColIndex; i <= lastColIndex; ++i) { - Colinfo colinfo; - if (m_colinfoMap.find(i) != m_colinfoMap.end()) { - m_colinfoMap[i].m_width = width; - //colinfo = m_colinfoMap[i]; - } - else { - colinfo.m_width = width; - m_colinfoMap[i] = colinfo; - } - } - } - else if (code == XL_COLUMNDEFAULT) { // BIFF2 only - if (!m_book->m_addStyle) - continue; - - unsigned short firstColIndex = m_book->readByte(data, 0, 2); - unsigned short lastColIndex = m_book->readByte(data, 2, 2); - // Warning: OOo docs wrong; firstColIndex <= colx < lastColIndex - if (0 > firstColIndex || firstColIndex >= lastColIndex || lastColIndex > 256) - lastColIndex = std::min((int)lastColIndex, 256); - for (int i = firstColIndex; i < lastColIndex; ++i) { - std::string cellAttributes = data.substr(4 + 3*(i - firstColIndex), 3); - int xfIndex = fixedXfIndexB2(cellAttributes); - - Colinfo colinfo; - if (m_colinfoMap.find(i) != m_colinfoMap.end()) { - m_colinfoMap[i].m_xfIndex = xfIndex; - //colinfo = m_colinfoMap[i]; - } - else { - colinfo.m_xfIndex = xfIndex; - m_colinfoMap[i] = colinfo; - } - } - } - else if (code == XL_WINDOW2_B2) { // BIFF 2 only - m_showFormula = (data[0] != '\0'); - m_showGridLine = (data[1] != '\0'); - m_showSheetHeader = (data[2] != '\0'); - m_isFrozenPanes = (data[3] != '\0'); - m_showZeroValue = (data[4] != '\0'); - - m_firstVisibleRowIndex = m_book->readByte(data, 5, 2); - m_firstVisibleColIndex = m_book->readByte(data, 7, 2); - m_automaticGridLineColor = m_book->readByte(data, 9, 1); - - m_gridlineColor = { - m_book->readByte(data, 10, 1), - m_book->readByte(data, 11, 1), - m_book->readByte(data, 12, 1) - }; - m_gridlineColorIndex = Formatting::getNearestColorIndex(m_book->m_colorMap, m_gridlineColor); - } - } - } - if (!eofFound) - throw std::logic_error("Sheet "+ std::to_string(m_number) + - " ("+ m_name + ") missing EOF record"); - // tidyDimensions(); - updateCookedFactors(); - m_book->m_position = oldPosition; - } catch (const std::logic_error &error) { - std::cout << error.what() << std::endl; - } -} - -void Sheet::append(const std::string &value) -{ - m_sheetContent += value + '\n'; -} - -#if 0 -void Sheet::putCell(int rowIndex, int colIndex, const std::string& value, int xfIndex) { - int rowCount = rowIndex + 1; - int colCount = colIndex + 1; - if (colCount > m_colCount) { - m_colCount = colCount; - // The row firstFullRowIndex and all subsequent rows are guaranteed to have length == m_colCount - // Cell data is not in non-descending row order AND m_colCount has been bumped up. - // This very rare case ruins this optmisation - if (rowCount < m_rowCount) - m_firstFullRowIndex = -2; - else if (rowIndex > m_firstFullRowIndex && m_firstFullRowIndex > -2) - m_firstFullRowIndex = rowIndex; - } - if (rowCount > m_rowCount) - m_rowCount = rowCount; - - // Add missing rows to table - for (int i = tools::xmlChildrenCount(m_table, "tr"); i <= rowIndex; ++i) { - auto tr = m_table.append_child("tr"); - addRowStyle(tr, i); - } - pugi::xml_node tr = *std::next(m_table.children("tr").begin(), rowIndex); - - // Add missing cells to row - for (int i = tools::xmlChildrenCount(tr, "td"); i < colIndex; ++i) { - auto td = tr.append_child("td"); - addColStyle(td, i); - } - auto td = tr.append_child("td"); - auto node = td; - - // Get cell style - if (m_book->m_addStyle) { - auto& xf = m_book->m_xfList[xfIndex]; - auto& cellFont = m_book->m_fontList[xf.m_fontIndex]; - addCellStyle(td, xf, rowIndex, colIndex); - - if (cellFont.m_isBold) - node = node.append_child("b"); - if (cellFont.m_isItalic) - node = node.append_child("i"); - if (cellFont.m_isUnderlined) - node = node.append_child("u"); - if (cellFont.m_isStruckOut) - node = node.append_child("s"); - if (cellFont.m_escapement == 1) - node = node.append_child("sup"); - if (cellFont.m_escapement == 2) - node = node.append_child("sub"); - } - - std::cout << value << std::endl; - node.append_child(pugi::node_pcdata).set_value(value.c_str()); -} - - -void Sheet::tidyDimensions() { - if (!m_mergedCells.empty()) { - int rowCount = 0; - int colCount = 0; - - for (const auto& cRange : m_mergedCells) { - if (cRange[1] > rowCount) - rowCount = cRange[1]; - if (cRange[3] > colCount) - colCount = cRange[3]; - } - if (colCount > m_colCount) { - m_colCount = colCount; - m_firstFullRowIndex = -2; - } - // Put one empty cell at (rowCount-1, 0) to make sure we have right number of rows - if (rowCount > m_rowCount) - putCell(rowCount-1, 0, "", -1); - } - - // Add missing cells to row - int rIndex = 0; - for (auto& tr : m_table.children("tr")) { - for (int i = tools::xmlChildrenCount(tr, "td"); i < m_colCount; ++i) { - auto td = tr.append_child("td"); - addColStyle(td, i); - } - rIndex++; - } - - // Add colspan/rowspan attributes - if (m_book->m_mergingMode == 0) { - int rowIndex = -1; - int colCount = -1; - for (const auto& cRange : m_mergedCells) { - auto tr = std::next(m_table.children("tr").begin(), cRange[0]); - - for (int i = cRange[0]; i < cRange[1]; ++i) { - if (rowIndex != i) { - rowIndex = i; - colCount = 0; - } - int offset = std::min(tools::xmlChildrenCount(*tr, "td"), cRange[3] - colCount) - 1; - auto td = std::next(tr->children("td").begin(), offset); - - int endRange = cRange[3] - (rowIndex == cRange[0]); - for (int j = cRange[2]; j < endRange; ++j) { - auto next = td--; - tr->remove_child(*next); - colCount++; - } - - if (rowIndex == cRange[0]) { - td->append_attribute("colspan") = std::to_string(cRange[3]-cRange[2]).c_str(); - td->append_attribute("rowspan") = std::to_string(cRange[1]-cRange[0]).c_str(); - } - tr++; - } - } - } - // Fill empty cells with duplicate values - else if (m_book->m_mergingMode == 1) { - for (const auto& cRange : m_mergedCells) { - auto tr = std::next(m_table.children("tr").begin(), cRange[0]); - auto tdMain = std::next(tr->children("td").begin(), cRange[2]); - - for (int i = cRange[0]; i < cRange[1]; ++i) { - auto td = std::next(tr->children("td").begin(), cRange[2]); - for (int j = cRange[2]; j < cRange[3]; ++j) { - // Each cell has only 1 element, so we need to copy only first child - if (td != tdMain) - td->append_copy(tdMain->first_child()); - td++; - } - tr++; - } - } - } -} -#endif - -// private: -std::string Sheet::stringRecordContent(const std::string& data) { - int length = (m_book->m_biffVersion >= 30) + 1; - unsigned short expectedCharCount = m_book->readByte(data, 0, length); - int offset = length; - int foundCharCount = 0; - std::string result = ""; - while (true) { - try { - if (m_book->m_biffVersion >= 80) - offset++; - std::string chunk = data.substr(offset); - result += chunk; - foundCharCount += static_cast(chunk.size()); - if (foundCharCount == expectedCharCount) - return result; - if (foundCharCount > expectedCharCount) - throw std::logic_error( - "STRING/CONTINUE: expected " + std::to_string(expectedCharCount) + - " chars, found " + std::to_string(foundCharCount) - ); - - unsigned short code; - unsigned short unusedLength; - std::string data; - m_book->getRecordParts(code, unusedLength, data); - if (code != XL_CONTINUE) - throw std::logic_error("Expected CONTINUE record; found record-type "+ std::to_string(code)); - offset = 0; - } catch (...) { - return result; - } - } -} - -int Sheet::fixedXfIndexB2(const std::string& cellAttributes, int trueXfIndex) { - int xfIndex; - if (m_book->m_biffVersion == 21) { - if (!m_book->m_xfList.empty()) { - if (trueXfIndex != -1) - xfIndex = trueXfIndex; - else - xfIndex = cellAttributes[0] & 0x3F; - - if (xfIndex == 0x3F) { - if (m_ixfe == 0) - throw std::logic_error("BIFF2 cell record has XF index 63 but no preceding IXFE record"); - xfIndex = m_ixfe; - // OOo docs are capable of interpretation that each - // cell record is preceded immediately by its own IXFE record. - // Empirical evidence is that (sensibly) an IXFE record applies to all - // following cell records until another IXFE comes along. - } - return xfIndex; - } - // Have either Excel 2.0, or broken 2.1 w/o XF records - same effect - m_book->m_biffVersion = 20; - } - xfIndex = m_cellAttributesToXfIndex[cellAttributes]; - if (xfIndex) - return xfIndex; - if (m_book->m_xfList.empty()) { - for (int i = 0; i < 16; ++i) - insertXfB20("\x40\x00\x00", i < 15); - } - xfIndex = insertXfB20(cellAttributes); - return xfIndex; -} - -int Sheet::insertXfB20(const std::string& cellAttributes, bool isStyle) { - int xfx = static_cast(m_book->m_xfList.size()); - XF xf; - fakeXfFromCellAttrB20(xf, cellAttributes, isStyle); - xf.m_xfIndex = xfx; - m_book->m_xfList.emplace_back(xf); - - if (m_book->m_formatMap.find(xf.m_formatKey) == m_book->m_formatMap.end()) { - Format fmt(xf.m_formatKey, FUN, "General"); - m_book->m_formatMap[xf.m_formatKey] = fmt; - m_book->m_formatList.emplace_back(fmt); - } - - Format fmt = m_book->m_formatMap[xf.m_formatKey]; - int cellty = CELL_TYPE_FROM_FORMAT_TYPE.at(fmt.m_type); - m_book->m_xfIndexXlTypeMap[xf.m_xfIndex] = cellty; - m_cellAttributesToXfIndex[cellAttributes] = xfx; - return xfx; -} - -void Sheet::fakeXfFromCellAttrB20(XF& xf, const std::string& cellAttributes, bool isStyle) { - xf.m_alignment = XFAlignment(); - xf.m_alignment.m_indentLevel = 0; - xf.m_alignment.m_isShrinkToFit = 0; - xf.m_alignment.m_textDirection = 0; - xf.m_border = XFBorder(); - xf.m_border.m_diagUp = false; - xf.m_border.m_diagDown = false; - xf.m_border.m_diagColor.m_index = 0; - xf.m_border.m_diagLineStyle = 0; // No line - xf.m_background = XFBackground(); - xf.m_protection = XFProtection(); - - unsigned char protection = m_book->readByte(cellAttributes, 0, 1); - unsigned char fontFormat = m_book->readByte(cellAttributes, 0, 1); - unsigned char style = m_book->readByte(cellAttributes, 0, 1); - - xf.m_protection.m_isCellLocked = (protection & 0x40) >> 6; - xf.m_protection.m_isFormulaHidden = (protection & 0x80) >> 7; - - xf.m_parentStyleIndex = isStyle ? 0 : 0x0FFF; - xf.m_formatKey = fontFormat & 0x3F; - xf.m_fontIndex = (fontFormat & 0xC0) >> 6; - - xf.m_alignment.m_isShrinkToFit = style & 0x07; - xf.m_alignment.m_verticalAlign = 2; // Bottom - xf.m_alignment.m_rotation = 0; - - xf.m_border.m_leftLineStyle = (style & 0x08) ? 1 : 0; // 1 - thin - xf.m_border.m_leftColor.m_index = (style & 0x08) ? 8 : 0; // 8 - black - xf.m_border.m_rightLineStyle = (style & 0x10) ? 1 : 0; - xf.m_border.m_rightColor.m_index = (style & 0x10) ? 8 : 0; - xf.m_border.m_topLineStyle = (style & 0x20) ? 1 : 0; - xf.m_border.m_topColor.m_index = (style & 0x20) ? 8 : 0; - xf.m_border.m_bottomLineStyle = (style & 0x40) ? 1 : 0; - xf.m_border.m_bottomColor.m_index = (style & 0x40) ? 8 : 0; - - xf.m_formatFlag = true; - xf.m_fontFlag = true; - xf.m_alignmentFlag = true; - xf.m_borderFlag = true; - xf.m_backgroundFlag = true; - xf.m_protectionFlag = true; - - xf.m_background.m_fillPattern = (style & 0x80) ? 17 : 0; - xf.m_background.m_backgroundColor.m_index = 9; // White - xf.m_background.m_patternColor.m_index = 8; // Black -} - - -std::string Sheet::getNullTerminatedUnicode(const std::string& buf, int& offset) const { - unsigned long size = m_book->readByte(buf, offset, 4) * 2; - offset += 4; - std::string res = buf.substr(offset, size-1); - offset += size; - return res; -} - -void Sheet::handleHyperlink(const std::string& data) { - int recordSize = static_cast(data.size()); - Hyperlink hlink; - hlink.m_firstRowIndex = m_book->readByte(data, 0, 2); - hlink.m_lastRowIndex = m_book->readByte(data, 2, 2); - hlink.m_firstColIndex = m_book->readByte(data, 4, 2); - hlink.m_lastColIndex = m_book->readByte(data, 6, 2); - int options = m_book->readByte(data, 28, 4); - int offset = 32; - - // Has a description - if (options & 0x14) - hlink.m_description = getNullTerminatedUnicode(data, offset); - // Has a target - if (options & 0x80) - hlink.m_target = getNullTerminatedUnicode(data, offset); - - // HasMoniker and not MonikerSavedAsString - if ((options & 1) && !(options & 0x100)) { - // An OLEMoniker structure - std::string clsId = m_book->readByte(data, offset, 16); - offset += 16; - if (clsId == "\xE0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B") { - // URL Moniker - unsigned long size = m_book->readByte(data, offset, 4); - offset += 4; - hlink.m_type = "url"; - hlink.m_url = data.substr(offset, size); - hlink.m_url = hlink.m_url.substr(0, hlink.m_url.find("\x00")); - offset += size; - } - else if (clsId == "\x03\x03\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46") { - // File moniker - unsigned long upLevels = m_book->readByte(data, offset, 2); - int size = m_book->readByte(data, offset+2, 4); - hlink.m_type = "local file"; - offset += 6; - // BYTES, not unicode - std::string shortPath = tools::repeatString("..\\", upLevels) + - data.substr(offset, size - 1); - offset += size + 24; // OOo: "unknown byte sequence" - // Above is version 0xDEAD + 20 reserved zero bytes - size = m_book->readByte(data, offset, 4); - offset += 4; - if (size) { - size = m_book->readByte(data, offset, 4); - offset += 6; // "unknown byte sequence" MS: 0x0003 - std::string extendedPath = data.substr(offset, size); // Not zero-terminated - offset += size; - hlink.m_url = extendedPath; - } - // The "shortpath" is bytes encoded in *UNKNOWN* creator's "ANSI" encoding - else { - hlink.m_url = shortPath; - } - } - } - // UNC - else if ((options & 0x163) == 0x103) { - hlink.m_type = "unc"; - hlink.m_url = getNullTerminatedUnicode(data, offset); - } - else if ((options & 0x16B) == 8) - hlink.m_type = "workbook"; - else - hlink.m_type = "unknown"; - - // Has textmark - if (options & 0x8) - hlink.m_textmark = getNullTerminatedUnicode(data, offset); - - int extraByteCount = recordSize - offset; - if (extraByteCount < 0) - throw std::logic_error("Bug or corrupt file, send copy of input file for debugging"); - - m_hyperlinkList.push_back(hlink); - for (int i = hlink.m_firstRowIndex; i <= hlink.m_lastRowIndex; ++i) - for (int j = hlink.m_firstColIndex; j <= hlink.m_lastColIndex; ++j) - m_hyperlinkMap[{i, j}] = hlink; -} - -void Sheet::handleQuicktip(const std::string& data) { - //unsigned short codeIndex = m_book->readByte(data, 0, 2); - //unsigned short firstRowIndex = m_book->readByte(data, 2, 2); - //unsigned short lastRowIndex = m_book->readByte(data, 4, 2); - //unsigned short firstColIndex = m_book->readByte(data, 6, 2); - //unsigned short lastColIndex = m_book->readByte(data, 8, 2); - m_hyperlinkList.back().m_quicktip = data.substr(10, data.size() - 10 - 2); -} - -void Sheet::handleMSObj(const std::string& data, MSObj& msObj) { - if (m_book->m_biffVersion < 80) { - msObj.m_isNull = true; - return; - } - int size = static_cast(data.size()); - int pos = 0; - while (pos < size) { - unsigned short ft = m_book->readByte(data, pos, 2); - unsigned short cb = m_book->readByte(data, pos+2, 2); - if (pos == 0 && !(ft == 0x15 && cb == 18)) { - msObj.m_isNull = true; - return; - } - // ftCmo ... s/b first - if (ft == 0x15) { - msObj.m_type = m_book->readByte(data, pos+4, 2); - msObj.m_id = m_book->readByte(data, pos+6, 2); - unsigned short options = m_book->readByte(data, pos+8, 2); - - msObj.m_isLocked = (options & 0x0001) >> 0; - msObj.m_isPrintable = (options & 0x0010) >> 4; - msObj.m_autoFilter = (options & 0x0100) >> 8; // Not documented in Excel 97 dev kit - msObj.m_scrollbarFlag = (options & 0x0200) >> 9; // Not documented in Excel 97 dev kit - msObj.m_autoFill = (options & 0x2000) >> 13; - msObj.m_autoLine = (options & 0x4000) >> 14; - } - else if (ft == 0x00) { - // Ignore "optional reserved" data at end of record - if (data.substr(pos, size - pos) == std::string(size - pos, '\0')) - break; - throw std::logic_error("Unexpected data at end of OBJECT record"); - } - // Scrollbar - else if (ft == 0x0C) { - msObj.m_scrollbarValue = m_book->readByte(data, pos+8, 2); - msObj.m_scrollbarMin = m_book->readByte(data, pos+10, 2); - msObj.m_scrollbarMax = m_book->readByte(data, pos+12, 2); - msObj.m_scrollbarInc = m_book->readByte(data, pos+14, 2); - msObj.m_scrollbarPage = m_book->readByte(data, pos+16, 2); - } - // List box data - else if (ft == 0x13) { - // Non standard exit. NOT documented - if (msObj.m_autoFilter) - break; - } - pos += cb + 4; - } -} - -void Sheet::handleMSTxo(const std::string& data, MSTxo& msTxo) { - if (m_book->m_biffVersion < 80) { - msTxo.m_isNull = true; - return; - } - size_t size = data.size(); - unsigned short options = m_book->readByte(data, 0, 2); - msTxo.m_rotation = m_book->readByte(data, 2, 2); - std::string controlInfo = data.substr(4, 6); - unsigned short cchText = m_book->readByte(data, 10, 2); - unsigned short cbRuns = m_book->readByte(data, 12, 2); - msTxo.m_isNotEmpty = m_book->readByte(data, 14, 2); - msTxo.m_formula = data.substr(16, size); - - msTxo.m_horzAlign = (options & 0x0001) >> 3; - msTxo.m_vertAlign = (options & 0x0001) >> 6; - msTxo.m_lockText = (options & 0x0001) >> 9; - msTxo.m_justLast = (options & 0x0001) >> 14; - msTxo.m_secretEdit = (options & 0x0001) >> 15; - - msTxo.m_text.clear(); - int totalCharCount = 0; - while (totalCharCount < cchText) { - unsigned short code2; - unsigned short size2; - std::string data2; - m_book->getRecordParts(code2, size2, data2); - - char nb = data2[0]; // 0 means latin1, 1 means utf_16_le - int charCount = size2 - 1; - if (nb) - charCount /= 2; - - int endPos = 0; - msTxo.m_text += m_book->unpackUnicodeUpdatePos(data2, endPos, 2, charCount); - totalCharCount += charCount; - } - msTxo.m_richtextRunlist.clear(); - int totalRuns = 0; - while (totalRuns < cbRuns) { // Counts of BYTES, not runs - unsigned short code2; - unsigned short size2; - std::string data2; - m_book->getRecordParts(code2, size2, data2); - - for (int pos = 0; pos < size2; pos += 8) { - msTxo.m_richtextRunlist.emplace_back(m_book->readByte(data2, pos, 2), - m_book->readByte(data2, pos+2, 2)); - totalRuns += 8; - } - } - // Remove trailing entries that point to the end of string - for ( - auto it = msTxo.m_richtextRunlist.rbegin(); - (it != msTxo.m_richtextRunlist.rend()) && (it->first == cchText); - ++it - ) - msTxo.m_richtextRunlist.pop_back(); -} - -void Sheet::handleNote(const std::string& data, std::unordered_map& msTxos) { - Note note; - int size = static_cast(data.size()); - if (m_book->m_biffVersion < 80) { - note.m_rowIndex = m_book->readByte(data, 0, 2); - note.m_colIndex = m_book->readByte(data, 2, 2); - unsigned short expectedByteCount = m_book->readByte(data, 4, 2); - - unsigned short nb = size - 6; - note.m_text = data.substr(6, size); - expectedByteCount -= nb; - while (expectedByteCount > 0) { - unsigned short code2; - unsigned short size2; - std::string data2; - m_book->getRecordParts(code2, size2, data2); - - nb = m_book->readByte(data2, 4, 2); - note.m_text += data2.substr(6); - expectedByteCount -= nb; - } - note.m_richtextRunlist.emplace_back(0, 0); - - m_cellNoteMap[{note.m_rowIndex, note.m_colIndex}] = note; - return; - } - // Excel 8.0+ - note.m_rowIndex = m_book->readByte(data, 0, 2); - note.m_colIndex = m_book->readByte(data, 2, 2); - unsigned short options = m_book->readByte(data, 4, 2); - note.m_objectId = m_book->readByte(data, 6, 2); - - note.m_isShown = (options >> 1) & 1; - note.m_isRowHidden = (options >> 7) & 1; - note.m_isColHidden = (options >> 8) & 1; - // XL97 dev kit says NULL [sic] bytes padding between string count and string data - // to ensure that string is word-aligned. Appears to be nonsense - int endPos = 8; - note.m_author = m_book->unpackUnicodeUpdatePos(data, endPos, 2); - // There is a random/undefined byte after the author string (not counted in string length) - if (msTxos.find(note.m_objectId) != msTxos.end()) { - auto& msTxo = msTxos[note.m_objectId]; - note.m_text = msTxo.m_text; - note.m_richtextRunlist = msTxo.m_richtextRunlist; - m_cellNoteMap[{note.m_rowIndex, note.m_colIndex}] = note; - } -} - -void Sheet::updateCookedFactors() { - if (m_showPageBreakPreview) { - // No SCL record - if (m_sclMagFactor == -1) - m_cookedPageBreakPreviewMagFactor = 100; // Yes, 100, not 60, NOT a typo - else - m_cookedPageBreakPreviewMagFactor = m_sclMagFactor; - int zoom = m_cachedNormalViewMagFactor; - if (!(10 <= zoom && zoom <= 400)) - zoom = m_cookedPageBreakPreviewMagFactor; - m_cookedNormalViewMagFactor = zoom; - } - // Normal view mode - else { - // No SCL record - if (m_sclMagFactor == -1) - m_cookedNormalViewMagFactor = 100; - else - m_cookedNormalViewMagFactor = m_sclMagFactor; - int zoom = m_cachedPageBreakPreviewMagFactor; - // VALID, defaults to 60 - if (!zoom) - zoom = 60; - else if (!(10 <= zoom && zoom <= 400)) - zoom = m_cookedNormalViewMagFactor; - m_cookedPageBreakPreviewMagFactor = zoom; - } -} - - -void Sheet::unpackCellRangeAddressListUpdatePos(std::vector>& outputList, - const std::string& data, int& pos, int addressSize) const -{ - unsigned short listSize = m_book->readByte(data, pos, 2); - pos += 2; - if (listSize) { - for (int i = 0; i < listSize; ++i) { - if (addressSize == 6) - outputList.push_back({ - m_book->readByte(data, pos, 2), - m_book->readByte(data, pos+2, 2) + 1, - m_book->readByte(data, pos+4, 1), - m_book->readByte(data, pos+5, 1) + 1 - }); - else - outputList.push_back({ - m_book->readByte(data, pos, 2), - m_book->readByte(data, pos+2, 2) + 1, - m_book->readByte(data, pos+4, 2), - m_book->readByte(data, pos+6, 2) + 1 - }); - pos += addressSize; - } - } -} - -double Sheet::unpackRK(const std::string& data) const { - char flags = data[0]; - // There's a SIGNED 30-bit integer in there - if (flags & 2) { - int i = m_book->readByte(data, 0, 4); - i >>= 2; // Div by 4 to drop the 2 flag bits - if (flags & 1) - return i / 100.0; - return i; - } - // It's the most significant 30 bits of IEEE 754 64-bit FP number - else { - double d = m_book->readByte( - std::string(4, '\0') + (char)(flags & 252) + data.substr(1, 3), - 0, 8 - ); - if (flags & 1) - return d / 100.0; - return d; - } -} - -void Sheet::addCellStyle(pugi::xml_node& node, const XF& xf, int rowIndex, int colIndex) { - auto& cellFont = m_book->m_fontList[xf.m_fontIndex]; - auto fontColor = getColor(cellFont.m_color); - auto cellColor = getColor(xf.m_background.m_patternColor); - std::unordered_map styleMap; - std::unordered_map borderMap; - - // Column style - addColStyle(node, colIndex); - - // Table parts style - for (const auto& cRange : m_tableParts) { - if (cRange[0] <= rowIndex && rowIndex <= cRange[1] && - cRange[2] <= colIndex && colIndex <= cRange[3] - ) { - if (cRange[0] == rowIndex) { - getTableColor(styleMap["color"], TABLE_COLOR.at(cRange[4]), 0); - getTableColor(styleMap["background"], TABLE_BACKGROUND.at(cRange[4]), 0); - } - /*else { - getTableColor(styleMap["color"], TABLE_COLOR[cRange[4]], 1); - if ((rowIndex - cRange[0]) % 2) - getTableColor(styleMap["background"], TABLE_BACKGROUND[cRange[4]], 1); - else - getTableColor(styleMap["background"], TABLE_BACKGROUND[cRange[4]], 2); - }*/ - break; - } - } - - // Cell style - styleMap["font-size"] = std::to_string(cellFont.m_height/20) +"px"; - styleMap["font-family"] = "'"+ cellFont.m_name +"'"; - - if (!cellColor.empty()) - styleMap["background"] = cellColor; - if (!fontColor.empty()) - styleMap["color"] = fontColor; - - if (xf.m_alignment.m_horizontalAlign) - styleMap["text-align"] = CELL_HORZ_ALIGN[xf.m_alignment.m_horizontalAlign]; - if (xf.m_alignment.m_verticalAlign) - styleMap["vertical-align"] = CELL_VERT_ALIGN[xf.m_alignment.m_verticalAlign]; - - borderMap["top"] = getColor(xf.m_border.m_topColor); - borderMap["left"] = getColor(xf.m_border.m_leftColor); - borderMap["right"] = getColor(xf.m_border.m_rightColor); - borderMap["bottom"] = getColor(xf.m_border.m_bottomColor); - styleMap["border-top"] = std::to_string(CELL_BORDER_SIZE[xf.m_border.m_topLineStyle]) +"px "+ - CELL_BORDER_TYPE[xf.m_border.m_topLineStyle] +" "+ - (borderMap["top"].empty() ? "#000" : borderMap["top"]); - styleMap["border-left"] = std::to_string(CELL_BORDER_SIZE[xf.m_border.m_leftLineStyle]) +"px "+ - CELL_BORDER_TYPE[xf.m_border.m_leftLineStyle] +" "+ - (borderMap["left"].empty() ? "#000" : borderMap["left"]); - styleMap["border-right"] = std::to_string(CELL_BORDER_SIZE[xf.m_border.m_rightLineStyle]) +"px "+ - CELL_BORDER_TYPE[xf.m_border.m_rightLineStyle] +" "+ - (borderMap["right"].empty() ? "#000" : borderMap["right"]); - styleMap["border-bottom"] = std::to_string(CELL_BORDER_SIZE[xf.m_border.m_bottomLineStyle]) +"px "+ - CELL_BORDER_TYPE[xf.m_border.m_bottomLineStyle] +" "+ - (borderMap["bottom"].empty() ? "#000" : borderMap["bottom"]); - - if (xf.m_alignment.m_rotation) { - if (xf.m_alignment.m_rotation <= 90) - styleMap["transform"] = "rotate("+ - std::to_string(-xf.m_alignment.m_rotation) +"deg)"; - else if (xf.m_alignment.m_rotation <= 180) - styleMap["transform"] = "rotate("+ - std::to_string(xf.m_alignment.m_rotation - 90) +"deg)"; - } - - if (xf.m_alignment.m_textDirection) - styleMap["direction"] = "rtl"; - - std::string style; - for (const auto& sm : styleMap) { - if (!sm.second.empty()) - style += sm.first + ":" + sm.second + "; "; - } - if (!style.empty()) { - if (node.attribute("style")) - node.attribute("style").set_value((style + node.attribute("style").value()).c_str()); - else - node.append_attribute("style") = style.c_str(); - } -} - -void Sheet::addRowStyle(pugi::xml_node& node, int rowIndex) { - if (!m_book->m_addStyle || m_rowinfoMap.find(rowIndex) == m_rowinfoMap.end()) - return; - - std::unordered_map styleMap; - if (m_rowinfoMap[rowIndex].m_height) - styleMap["height"] = std::to_string(m_rowinfoMap[rowIndex].m_height/20) +"px"; - if (m_rowinfoMap[rowIndex].m_isHidden) - styleMap["display"] = "none"; - - std::string style; - for (const auto& sm : styleMap) - style += sm.first + ":" + sm.second + "; "; - if (!style.empty()) - node.append_attribute("style") = style.c_str(); -} - -void Sheet::addColStyle(pugi::xml_node& node, int colIndex) { - if (!m_book->m_addStyle || m_colinfoMap.find(colIndex) == m_colinfoMap.end()) - return; - - std::unordered_map styleMap; - if (m_colinfoMap[colIndex].m_width) - styleMap["min-width"] = std::to_string(m_colinfoMap[colIndex].m_width/45) +"px"; - if (m_colinfoMap[colIndex].m_isHidden) - styleMap["display"] = "none"; - - std::string style; - for (const auto& sm : styleMap) - style += sm.first + ":" + sm.second + "; "; - if (!style.empty()) - node.append_attribute("style") = style.c_str(); -} - -std::string Sheet::getColor(const XFColor& color) const { - std::vector result; - if (color.m_isRgb) { - result = color.m_rgb; - } - else { - result = m_book->m_colorMap[color.m_index]; - if (result.empty()) - return ""; - } - - if (color.m_tint < 0) { - for (auto& c : result) - c = static_cast(c * (1 + color.m_tint)); - } - else if (color.m_tint > 0) { - for (auto& c : result) - c = static_cast(c * (1 - color.m_tint) + (255 - 255 * (1 - color.m_tint))); - } - - return "rgb("+ std::to_string(result[0]) +", "+ - std::to_string(result[1]) +", "+ std::to_string(result[2]) +")"; -} - -void Sheet::getTableColor(std::string& style, const std::vector& colorMap, - int colorIndex) const -{ - if (static_cast(colorMap.size()) > colorIndex && !colorMap[colorIndex].empty()) - style = "#" + colorMap[colorIndex]; -} - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/sheet.hpp b/3rdparty/libs/fileext/excel/sheet.hpp deleted file mode 100644 index 5e8db46..0000000 --- a/3rdparty/libs/fileext/excel/sheet.hpp +++ /dev/null @@ -1,770 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file sheet.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 28.01.2018 - */ -#pragma once - -#include -#include -#include -#include -#include - -#include "book.hpp" -#include "format.hpp" - - -namespace excel { - -class Book; -class XF; -class XFColor; -class Hyperlink; -class MSObj; -class MSTxo; -class Note; -class Cell; - -/** - * @class Colinfo - * @brief - * Width and default formatting information that applies to one or more columns in a sheet - * @details - * Here is default hierarchy for width, according to the OOo docs: - * In BIFF3, if a COLINFO record is missing for a column, width specified in record DEFCOLWIDTH. - * In BIFF4-BIFF7, width set in this COLINFO record is only used, if the corresponding bit for - * this column is cleared in GCW record, otherwise column width set in DEFCOLWIDTH record. - * In BIFF8, if COLINFO record is missing for column, width specified in record STANDARDWIDTH. - * If this STANDARDWIDTH record is also missing, column width of record DEFCOLWIDTH is used. - */ -class Colinfo { -public: - Colinfo() = default; - - /** Width of the column in 1/256 of width of zero character, using default font */ - unsigned short m_width = 0; - /** XF index for formatting empty cells */ - int m_xfIndex = -1; - /** If column is hidden */ - bool m_isHidden = false; - /** If column is collapsed */ - bool m_isCollapsed = false; - /** 1-bit flag whose purpose is unknown, but is often seen set to 1 */ - bool m_bitFlag = false; - /** Outline level of the column (0 = no outline) */ - int m_outlineLevel = 0; -}; - - -/** - * @class Rowinfo - * @brief - * Height and default formatting information that applies to row in a sheet - */ -class Rowinfo { -public: - Rowinfo() = default; - - /** Height of the row, in twips. One twip == 1/20 of a point */ - int m_height = 0; - /** - * If row has default height - * Value | Description - * :---: | ----------- - * False | Row has custom height - * True | Row has default height - */ - bool m_hasDefaultHeight = false; - /** - * Height mismatch - * Value | Description - * :---: | ----------- - * False | Row height and default font height are equal - * True | Row height and default font height do not match - */ - bool m_isHeightMismatch = false; - /** If row is hidden (manually, or by filter or outline group) */ - bool m_isHidden = false; - /** - * If row has default XF index - * Value | Description - * :---: | ----------- - * False | Ignore xfIndex attribute - * True | xfIndex attribute is usable - */ - bool m_hasDefaultXfIndex = false; - /** Index to default XF record for empty cells in this row */ - int m_xfIndex = 0; - /** Outline level of the row (0 to 7) */ - int m_outlineLevel = 0; - /** - * If outline group starts or ends here (depending on where outline buttons are located) - * and is collapsed - */ - bool m_isOutlineGroupStartsEnds = false; - /** - * This flag is set if the upper border of at least one cell in this row or if the lower - * border of at least one cell in row above is formatted with a thick line style. - * Thin and medium line styles are not taken into account - */ - bool m_hasAdditionalSpaceAbove = false; - /** - * This flag is set if the lower border of at least one cell in this row or if the upper - * border of at least one cell in row below is formatted with a medium or thick line style. - * Thin line styles are not taken into account - */ - bool m_hasAdditionalSpaceBelow = false; -}; - - -/** - * @class Sheet - * @brief - * Contains data for one worksheet - * @note - * Negative values for row/column indexes and slice positions are supported in expected fashion - */ -class Sheet { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @param[in] position - * Sheet record start position - * @param[in] name - * Sheet name - * @param[in] number - * Sheet number - * @param[in] table - * Result HTML table - * @since 1.0 - */ - Sheet(Book* book, int position, const std::string& name, - size_t number, std::string& text); - - /** - * @brief - * Read SHEET data - * @since 1.0 - */ - void read(); - - /** - * @brief - * Add cell data to table - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] value - * Cell value - * @param[in] xfIndex - * XF index - * @since 1.0 - */ - void putCell(int rowIndex, int colIndex, const std::string& value, int xfIndex); - - void append(const std::string &value); - - /** - * @brief - * Add missing cells to table - * @since 1.0 - */ - void tidyDimensions(); - - /** Pointer to parent BOOK object */ - Book* m_book; - /** Result HTML table */ - std::string& m_sheetContent; - /** Sheet name */ - std::string m_name; - /** Sheet number */ - size_t m_number; - /** Number of rows in sheet */ - int m_rowCount = 0; - /** - * Nominal number of columns in sheet. It is one more than maximum column index found, - * ignoring trailing empty cells - */ - int m_colCount = 0; - /** - * Map from column index to COLINFO object. Often there is an entry in COLINFO records - * for all column indexes in `range(257)` - */ - std::unordered_map m_colinfoMap; - /** - * Map from row index to ROWINFO object. It is possible to have missing entries - at least - * one source of XLS files doesn't bother writing ROW records - */ - std::unordered_map m_rowinfoMap; - /** - * List of address ranges of cells containing column labels. These are set up - * in Excel by `Insert` -> `Name` -> `Labels` -> `Columns` - */ - std::vector> m_colLabelRanges; - /** List of address ranges of cells containing row labels */ - std::vector> m_rowLabelRanges; - /** - * List of address ranges of cells which have been merged. These are set up - * in Excel by `Format` -> `Cells` -> `Alignment`, then ticking "Merge cells" box. - * Upper limits are exclusive: i.e. `[2, 3, 7, 9]` only spans two cells - */ - std::vector> m_mergedCells; - /** Table parts style list in format: `(row1, col1, row2, col2, styleId)` */ - std::vector> m_tableParts; - /** Max row count */ - long int m_maxRowCount; - /** Max column count */ - int m_maxColCount = 256; - /** Dimension row count as per DIMENSIONS record */ - int m_dimensionRowCount = 0; - /** Dimension column count as per DIMENSIONS record */ - int m_dimensionColCount = 0; - /** First full row index */ - int m_firstFullRowIndex = -1; - /** - * Default column width from DEFCOLWIDTH record, else `None`. From the OOo docs: - * Column width in characters, using width of zero character from default font (first FONT - * record in file). Excel adds some extra space to the default width, depending on default font - * and default font size. Algorithm how to exactly calculate resulting column width is not known. - * Example: - * @code Default width of 8 set in this record results in column width of 8.43 using Arial - * font with a size of 10 points. @endcode - */ - unsigned short m_defaultColWidth = 0; - /** - * Default column width from STANDARDWIDTH record, else `None`. From the OOo docs: - * Default width of columns in 1/256 of width of zero character, using default font - * (first FONT record in the file) - */ - unsigned short m_standardWidth; - /** - * Default value to be used for row if there is no ROW record for that row. From - * optional `DEFAULTROWHEIGHT` record - */ - int m_defaultRowHeight; - /** - * Default row height mismatch - * Value | Description - * :---: | ----------- - * False | Default row height and default font height are equal - * True | Default row height and default font height do not match - */ - bool m_isDefaultRowHeightMismatch; - /** If default row is hidden */ - bool m_isDefaultRowHidden; - /** If cells have default additional space above */ - bool m_hasDefaultAdditionalSpaceAbove; - /** If cells have default additional space below */ - bool m_hasDefaultAdditionalSpaceBelow; - /** List of HYPERLINK objects corresponding to HLINK records found in worksheet */ - std::vector m_hyperlinkList; - /** - * Mapping from `(rowIndex, colIndex)` to item in #m_hyperlinkList. Cells not covered by - * hyperlink are not mapped. It is possible using the Excel UI to set up a hyperlink that - * covers larger-than-1x1 rectangle of cells. Hyperlink rectangles may overlap (Excel doesn't - * check). When a multiply-covered cell is clicked on, hyperlink that is activated (and the one - * that is mapped here) is the last in @ref Sheet.m_hyperlinkList - */ - std::map, Hyperlink> m_hyperlinkMap; - /** - * Mapping from `(rowIndex, colIndex)` to NOTE object. Cells not containing - * a note ("comment") are not mapped - */ - std::map, Note> m_cellNoteMap; - /** - * Contains data for one cell. Cell objects have three attributes: `type` is an int, `value` - * (which depends on `type`) and `xfIndex`. - * The following table describes types of cells and how their values are represented - * Type symbol | Type number | Value - * --------------- | :---------: | ----- - * XL_CELL_EMPTY | 0 | Empty string - * XL_CELL_TEXT | 1 | Unicode string - * XL_CELL_NUMBER | 2 | Float - * XL_CELL_DATE | 3 | Float - * XL_CELL_BOOLEAN | 4 | Bool: 0 => False; 1 => True - * XL_CELL_ERROR | 5 | Int representing internal Excel codes - * XL_CELL_BLANK | 6 | Empty string. Only when m_addStyle = true - */ - std::vector> m_cellValues; - std::vector> m_cellTypes; - std::vector> m_cellXfIndexes; - /** - * Visibility of sheet - * Value | Description - * :---: | ----------- - * 0 | Visible - * 1 | Hidden (can be unhidden by user - `Format` -> `Sheet` -> `Unhide`) - * 2 | "very hidden" (can be unhidden only by VBA macro) - */ - int m_visibility = 0; - /** First visible row index */ - unsigned short m_firstVisibleRowIndex = 0; - /** First visible column index */ - unsigned short m_firstVisibleColIndex = 0; - /** Grid line color. Pre-BIFF8 */ - std::vector m_gridlineColor {0, 0, 0}; - /** Grid line color index */ - unsigned short m_gridlineColorIndex = 0x40; - /** - * Sheet flags - * Bit | Mask | Contents - * :-: | ----- | -------- - * 0 | 0001H | 0 = Show formula results 1 = Show formulas - * 1 | 0002H | 0 = Do not show grid lines 1 = Show grid lines - * 2 | 0004H | 0 = Do not show sheet headers 1 = Show sheet headers - * 3 | 0008H | 0 = Panes are not frozen 1 = Panes are frozen (freeze) - * 4 | 0010H | 0 = Show zero values as empty cells 1 = Show zero values - * 5 | 0020H | 0 = Manual grid line color 1 = Automatic grid line color - * 6 | 0040H | 0 = Columns from left to right 1 = Columns from right to left - * 7 | 0080H | 0 = Do not show outline symbols 1 = Show outline symbols - * 8 | 0100H | 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed - * 9 | 0200H | 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8) - * 10 | 0400H | 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8) - * 11 | 0800H | 0 = Show in normal view 1 = Show in page break preview (BIFF8) - */ - bool m_showFormula = false; - bool m_showGridLine = true; - bool m_showSheetHeader = true; - bool m_isFrozenPanes = false; - bool m_showZeroValue = true; - bool m_automaticGridLineColor = true; - bool m_columnsRightToLeft = false; - bool m_showOutlineSymbol = true; - bool m_removeSplits = false; - /** If sheet is selected. Multiple sheets can be selected, but only one can be active */ - bool m_isSheetSelected = 0; - /** - * Should really be called "sheetActive" and is 1 when this sheet is sheet displayed when file - * is open. More than likely only one sheet should ever be set as visible. This would correspond - * to Book's sheetActive attribute, but that doesn't exist as WINDOW1 records aren't currently - * processed. The real thing is the visibility attribute from BOUNDSHEET record - */ - bool m_isSheetVisible = 0; - /** - * Mapping of `(rowIndex, colIndex)` to list of `(offset, fontIndex)` tuples. Offset defines - * where in string the font begins to be used. Offsets are expected to be in ascending order. - * If first offset != zero, the meaning is that cell's XF font should be used from offset 0. - * This is a sparse mapping. There is no entry for cells that are not formatted with rich text - */ - std::map, std::vector>> m_richtextRunlistMap; - /** Number of columns in left pane (frozen panes) */ - unsigned short m_vertSplitPos = 0; - /** Number of rows in top pane (frozen panes) */ - unsigned short m_horzSplitPos = 0; - /** Index of first visible row in bottom frozen/split pane */ - unsigned short m_horzSplitFirstVisible = 0; - /** Index of first visible column in right frozen/split pane */ - unsigned short m_vertSplitFirstVisible = 0; - /** Frozen panes: ignore it. Split panes: explanation and diagrams in OOo docs */ - char m_splitActivePane = 0; - /** Boolean specifying if PANE record was present */ - bool m_hasPaneRecord = false; - /** - * List of horizontal page breaks in this sheet. Breaks are tuples in the form - * `(index of row after break, start column index, end column index)` - */ - std::vector> m_horizontalPageBreaks; - /** - * List of vertical page breaks in this sheet. Breaks are tuples in the form - * `(index of column after break, start row index, end row index)` - */ - std::vector> m_verticalPageBreaks; - /** - * 256-element tuple corresponding to contents of GCW record for this sheet. If no such record, - * treat as all bits zero. Applies to BIFF4-7 only - */ - std::vector m_gcw; - /** - * Values calculated to predict mag factors that will actually be used by Excel to display - * worksheet. Pass these values to when writing XLS files. - * Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's. - * Warning 2: A value of zero means almost exactly what it says. Your sheet will be displayed - * as very tiny speck on the screen - */ - int m_cookedPageBreakPreviewMagFactor = 60; - int m_cookedNormalViewMagFactor = 100; - /** Values (if any) actually stored on the XLS file */ - int m_cachedPageBreakPreviewMagFactor = 0; // Default (60%), from WINDOW2 record - int m_cachedNormalViewMagFactor = 0; // Default (100%), from WINDOW2 record - bool m_showPageBreakPreview = 0; - int m_sclMagFactor = -1; // From SCL record - int m_ixfe = 0; // BIFF2 only - -private: - /** - * @brief - * Get record string content - * @param[in] data - * Binary data - * @return - * Record string content - * @since 1.0 - */ - std::string stringRecordContent(const std::string& data); - - /** - * @brief - * Get fixed BIFF 2 XF index - * @param[in] cellAttributes - * Cell attributes - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @param[in] trueXfIndex - * True XF index - * @return - * XF index - * @since 1.0 - */ - int fixedXfIndexB2(const std::string& cellAttributes, int trueXfIndex = -1); - - /** - * @brief - * Insert new BIFF 2.0 XF - * @param[in] cellAttributes - * Cell attributes - * @param[in] isStyle - * If cell attribute is style - * @return - * XF index - * @since 1.0 - */ - int insertXfB20(const std::string& cellAttributes, bool isStyle = false); - - /** - * @brief - * Get fake XF from BIFF 2.0 cell attribute - * @param[out] xf - * Parent XF - * @param[in] cellAttributes - * Cell attributes - * @param[in] isStyle - * If cell attribute is style - * @since 1.0 - */ - void fakeXfFromCellAttrB20(XF& xf, const std::string& cellAttributes, bool isStyle = false); - - /** - * @brief - * Get null-terminated unicode string - * @param[in] buf - * Binary data - * @param[in] offset - * Record start position - * @return - * Unicode string - * @since 1.0 - */ - std::string getNullTerminatedUnicode(const std::string& buf, int& offset) const; - - /** - * @brief - * Read HYPERLINK record - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleHyperlink(const std::string& data); - - /** - * @brief - * Read QUICKTIP record - * @param[in] data - * Binary data - * @since 1.0 - */ - void handleQuicktip(const std::string& data); - - /** - * @brief - * Process MSObj object - * @param[in] data - * Binary data - * @param[out] msObj - * MSObj object - * @since 1.0 - */ - void handleMSObj(const std::string& data, MSObj& msObj); - - /** - * @brief - * Process MSTxo object - * @param[in] data - * Binary data - * @param[out] msTxo - * MSTxo object - * @since 1.0 - */ - void handleMSTxo(const std::string& data, MSTxo& msTxo); - - /** - * @brief - * Process NOTE record - * @param[in] data - * Binary data - * @param[in] msTxos - * List of MSTxo objects - * @since 1.0 - */ - void handleNote(const std::string& data, std::unordered_map& msTxos); - - /** - * @brief - * Update coocked factors - * @details - * Cached values are used ONLY for non-active view mode. When user switches to non-active - * view mode, if cached value for that mode is not valid, Excel pops up a window which says: - * "The number must be between 10 and 400. Try again by entering a number in this range." - * When user hits OK, it drops into non-active view mode but uses the magn from active mode - * @since 1.0 - */ - void updateCookedFactors(); - - /** - * @brief - * Unpack range of cells address list and update position - * @param[out] outputList - * Array in which address will be saved - * @param[in] data - * Binary data - * @param[in] pos - * Record start position - * @param[in] addressSize - * Address size - * @since 1.0 - */ - void unpackCellRangeAddressListUpdatePos(std::vector>& outputList, - const std::string& data, int& pos, int addressSize = 6) const; - - /** - * @brief - * Unpack RK record data - * @param[in] data - * Binary data - * @return - * RD data - * @since 1.0 - */ - double unpackRK(const std::string& data) const; - - /** - * @brief - * Add style to cell node in HTML-tree - * @param[out] node - * Cell node in HTML-tree - * @param[in] xf - * Parent XF - * @param[in] rowIndex - * Row index - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void addCellStyle(pugi::xml_node& node, const XF& xf, int rowIndex, int colIndex); - - /** - * @brief - * Add style to table row - * @param[out] node - * Cell node in HTML-tree - * @param[in] rowIndex - * Row index - * @since 1.0 - */ - void addRowStyle(pugi::xml_node& node, int rowIndex); - - /** - * @brief - * Add style to table column - * @param[out] node - * Cell node in HTML-tree - * @param[in] colIndex - * Column index - * @since 1.0 - */ - void addColStyle(pugi::xml_node& node, int colIndex); - - /** - * @brief - * Get object color from XFColor record - * @param[in] xf - * Parent XFColor object - * @return - * Object color like `rgb(red, green, blue)` - * @since 1.0 - */ - std::string getColor(const XFColor& color) const; - - /** - * @brief - * Get table color value - * @param[out] style - * Result style value - * @param[in] colorMap - * Color map - * @param[in] colorIndex - * Color index - * @since 1.0 - */ - void getTableColor(std::string& style, const std::vector& colorMap, int colorIndex) const; - - /** Sheet record start position */ - int m_position; - //** Highest rowIndex containing a non-empty cell */ - //int m_maxDataRowIndex = -1; - //** Highest colIndex containing a non-empty cell */ - //int m_maxDataColIndex = -1; - /** Maps cell attributes to XF index. BIFF2.0 only */ - std::unordered_map m_cellAttributesToXfIndex; -}; - - -/** - * @class Hyperlink - * @brief - * Contains attributes of hyperlink - */ -class Hyperlink { -public: - Hyperlink() = default; - - /** Index of first row */ - unsigned short m_firstRowIndex = 0; - /** Index of last row */ - unsigned short m_lastRowIndex = 0; - /** Index of first column */ - unsigned short m_firstColIndex = 0; - /** Index of last column */ - unsigned short m_lastColIndex = 0; - /** Type of hyperlink: unicode string, 'url', 'unc', 'local file', 'workbook', 'unknown' */ - std::string m_type; - /** - * URL or file-path, depending on type. Unicode string, except in the rare case of - * a local but non-existent file with non-ASCII characters in name, in which case - * only the "8.3" filename is available - */ - std::string m_url; - /** - * Description. It is displayed in cell, and should be identical to cell value. Unicode string - * or `None`. It seems impossible NOT to have description created by Excel UI - */ - std::string m_description; - /** - * Target frame. Unicode string. No cases of this have been seen in the wild. - * It seems impossible to create one in Excel UI - */ - std::string m_target; - /** - * The piece after the "#" in "http://docs.python.org/library#struct_module", - * or `Sheet1!A1:Z99` part when type is "workbook" - */ - std::string m_textmark; - /** Text of "quick tip" displayed when cursor hovers over hyperlink */ - std::string m_quicktip; -}; - - -/** - * @class MSObj - * @brief - * Represents MSObj object - */ -class MSObj { -public: - MSObj() = default; - - /** If object is NULL */ - bool m_isNull = false; - /** Object type */ - unsigned short m_type = 0; - /** Object id */ - unsigned short m_id = -1; - /** If object is locked */ - bool m_isLocked = false; - /** If object is printable */ - bool m_isPrintable = false; - /** Automatic filter options */ - int m_autoFilter = 0; - /** Automatic fill options */ - int m_autoFill = 0; - /** Automatic line options */ - int m_autoLine = 0; - /** Scrollbar parameters */ - int m_scrollbarFlag = 0; - unsigned short m_scrollbarValue = 0; - unsigned short m_scrollbarMin = 0; - unsigned short m_scrollbarMax = 0; - unsigned short m_scrollbarInc = 0; - unsigned short m_scrollbarPage = 0; -}; - - -/** - * @class MSTxo - * @brief - * Represents MSTxo object - */ -class MSTxo { -public: - MSTxo() = default; - - /** If object is NULL */ - bool m_isNull = false; - /** Object rotation */ - unsigned short m_rotation = 0; - /** If is not empty */ - unsigned short m_isNotEmpty = -1; - /** Object formula */ - std::string m_formula; - /** Object text */ - std::string m_text; - /** Object horizontal alignment */ - int m_horzAlign = 0; - /** Object vertical alignment */ - int m_vertAlign = 0; - /** Text options */ - int m_lockText = 0; - int m_justLast = 0; - int m_secretEdit = 0; - /** List List of `(offsetInString, fontIndex)` tuples. The first offset should always be 0 */ - std::vector> m_richtextRunlist; -}; - - -/** - * @class Note - * @brief - * Represents user "comment" or "note" object - */ -class Note { -public: - Note() = default; - - /** Current object id */ - unsigned short m_objectId = 0; - /** Author of note */ - std::string m_author; - /** Text of note */ - std::string m_text; - /** If note is always shown */ - bool m_isShown = 0; - /** If containing row is hidden */ - bool m_isRowHidden = false; - /** If containing column is hidden */ - bool m_isColHidden = false; - /** Row index */ - int m_rowIndex = 0; - /** Column index */ - int m_colIndex = 0; - /** List of `(offsetInString, fontIndex)` tuples. The first offset should always be 0 */ - std::vector> m_richtextRunlist; -}; - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/style.css b/3rdparty/libs/fileext/excel/style.css deleted file mode 100644 index 267f320..0000000 --- a/3rdparty/libs/fileext/excel/style.css +++ /dev/null @@ -1 +0,0 @@ -body{background:#fafafa}label{background:#f1f1f1;color:#aaa;font-size:14px;font-weight:600;text-align:center;position:relative;top:3px;margin:0 0 -1px;padding:10px;display:inline-block;border:0 solid #ddd;border-width:1px;border-radius:3px 3px 0 0;cursor:pointer}label:hover{color:#888}input{position:absolute;left:-9999px}#tab10:checked~#tabL10,#tab11:checked~#tabL11,#tab12:checked~#tabL12,#tab13:checked~#tabL13,#tab14:checked~#tabL14,#tab15:checked~#tabL15,#tab16:checked~#tabL16,#tab17:checked~#tabL17,#tab18:checked~#tabL18,#tab19:checked~#tabL19,#tab1:checked~#tabL1,#tab20:checked~#tabL20,#tab2:checked~#tabL2,#tab3:checked~#tabL3,#tab4:checked~#tabL4,#tab5:checked~#tabL5,#tab6:checked~#tabL6,#tab7:checked~#tabL7,#tab8:checked~#tabL8,#tab9:checked~#tabL9{background:#fff;color:#555;border-top:1px solid #093;border-bottom:1px solid #fff;top:0;z-index:3;width:intrinsic}.tabContent{background:#fff;position:relative;z-index:2;width:intrinsic}.tabContent div{background:#fff;border:1px solid #ddd;padding:10px;display:none;-webkit-transition:opacity .2s ease-in-out;-moz-transition:opacity .2s ease-in-out;transition:opacity .2s ease-in-out}#tab10:checked~.tabContent #tabC10,#tab11:checked~.tabContent #tabC11,#tab12:checked~.tabContent #tabC12,#tab13:checked~.tabContent #tabC13,#tab14:checked~.tabContent #tabC14,#tab15:checked~.tabContent #tabC15,#tab16:checked~.tabContent #tabC16,#tab17:checked~.tabContent #tabC17,#tab18:checked~.tabContent #tabC18,#tab19:checked~.tabContent #tabC19,#tab1:checked~.tabContent #tabC1,#tab20:checked~.tabContent #tabC20,#tab2:checked~.tabContent #tabC2,#tab3:checked~.tabContent #tabC3,#tab4:checked~.tabContent #tabC4,#tab5:checked~.tabContent #tabC5,#tab6:checked~.tabContent #tabC6,#tab7:checked~.tabContent #tabC7,#tab8:checked~.tabContent #tabC8,#tab9:checked~.tabContent #tabC9{display:inline-block} \ No newline at end of file diff --git a/3rdparty/libs/fileext/excel/xlsx.cpp b/3rdparty/libs/fileext/excel/xlsx.cpp deleted file mode 100644 index 565be23..0000000 --- a/3rdparty/libs/fileext/excel/xlsx.cpp +++ /dev/null @@ -1,986 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file xlsx.cpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 28.01.2018 - */ -#include "tools.hpp" - -#include "sheet.hpp" - -#include "xlsx.hpp" - - -namespace excel { - -/** XLSX max row count */ -const long int X12_MAX_ROWS = 1048576; // 2^20 -/** XLSX max column count */ -const int X12_MAX_COLS = 16384; // 2^14 -/** Uppercase relations index */ -const std::unordered_map UPPERCASE_REL_INDEX { - {'1', 0}, {'3', 0}, {'2', 0}, {'5', 0}, {'4', 0}, {'7', 0}, {'6', 0}, - {'9', 0}, {'8', 0}, {'A', 1}, {'C', 3}, {'B', 2}, {'E', 5}, {'D', 4}, - {'G', 7}, {'F', 6}, {'I', 9}, {'H', 8}, {'K', 11}, {'J', 10}, {'M', 13}, - {'L', 12}, {'O', 15}, {'N', 14}, {'Q', 17}, {'P', 16}, {'S', 19}, {'R', 18}, - {'U', 21}, {'T', 20}, {'W', 23}, {'V', 22}, {'Y', 25}, {'X', 24}, {'Z', 26} -}; -/** Horizontal aligment types */ -const std::unordered_map XLSX_HORZ_ALIGN { - {"", 0}, - {"general", 0}, - {"left", 1}, - {"center", 2}, - {"right", 3}, - {"fill", 4}, - {"justify", 5}, - {"centerContinuous", 6}, - {"distributed", 7} -}; -/** Vertical aligment types */ -const std::unordered_map XLSX_VERT_ALIGN { - {"", 0}, - {"top", 0}, - {"center", 1}, - {"bottom", 2}, - {"justify", 3}, - {"distributed", 4} -}; -/** Border types */ -const std::unordered_map XLSX_BORDER_TYPE { - {"", 0}, - {"thin", 1}, - {"medium", 2}, - {"dashed", 3}, - {"dotted", 4}, - {"thick", 5}, - {"double", 6}, - {"hair", 7}, - {"mediumDashed", 8}, - {"dashDot", 9}, - {"mediumDashDot", 10}, - {"dashDotDot", 11}, - {"mediumDashDotDot", 12}, - {"slantDashDot", 13} -}; -/** Fill pattern types */ -const std::unordered_map XLSX_FILL_PATTERN { - {"", 0}, - {"none", 0}, - {"solid", 1}, - {"mediumGray", 2}, - {"darkGray", 3}, - {"lightGray", 4}, - {"darkHorizontal", 5}, - {"darkVertical", 6}, - {"darkDown", 7}, - {"darkUp", 8}, - {"darkGrid", 9}, - {"darkTrellis", 10}, - {"lightHorizontal", 11}, - {"lightVertical", 12}, - {"lightDown", 13}, - {"lightUp", 14}, - {"lightGrid", 15}, - {"lightTrellis", 16}, - {"gray125", 17}, - {"gray0625", 18} -}; - -// Xlsx -Xlsx::Xlsx(Book* book) - : m_book(book) {} - -void Xlsx::openWorkbookXlsx() { - X12Styles x12style(m_book); - x12style.handleTheme(); - x12style.handleStream(); - - X12Book x12book(m_book); - x12book.handleSst(); - x12book.handleRelations(); - x12book.handleProperties(); - x12book.handleStream(); -} - - -// X12General -X12General::X12General(Book* book) - : m_book(book) {} - -std::string X12General::getNodeText(const pugi::xml_node& node) { - std::string result = node.child_value(); - if (node.attribute("space").value() != std::string("preserve")) - result = tools::trim(result, "\t\n \r"); - return result; -} - -std::string X12General::getTextFromSiIs(const pugi::xml_node& node) { - std::string result; - for (const auto& child : node) { - std::string tag = child.name(); - if (tag == "t") - result += getNodeText(child); - else if (tag == "r") { - for (const auto& tNode : child) { - if (tNode.name() == std::string("t")) - result += getNodeText(tNode); - } - } - } - return result; -} - -void X12General::hexToColor(std::vector& colorMap, const std::string& color, int offset) { - for (int i = 0; i < 6; i += 2) { - unsigned long c = std::stoul(color.substr(offset + i, 2), nullptr, 16); - colorMap.emplace_back(static_cast(c)); - } -} - - -// X12Book public: -X12Book::X12Book(Book* book) -: X12General(book) { - m_book->m_sheetCount = 0; -} - -void X12Book::handleSst() { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "xl/sharedstrings.xml", tree); - - for (const auto& node : tree.select_nodes("//si")) - m_book->m_sharedStrings.emplace_back(getTextFromSiIs(node.node())); -} - -void X12Book::handleRelations() { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "xl/_rels/workbook.xml.rels", tree); - - for (const auto& node : tree.child("Relationships")) { - std::string relId = node.attribute("Id").value(); - std::string target = node.attribute("Target").value(); - std::string relType = node.attribute("Type").value(); - relType = relType.substr(relType.find_last_of("/") + 1); - - m_relIdToType[relId] = relType; - if (target[0] == '/') - m_relIdToPath[relId] = target.substr(1); // Drop the `/` - else - m_relIdToPath[relId] = "xl/" + target; - } -} - -void X12Book::handleProperties() { - if (!m_book->m_addStyle) - return; - - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "docprops/core.xml", tree); - - for (const auto& node : tree.select_nodes("//dc:creator")) - m_book->m_properties["creator"] = node.node().child_value(); - for (const auto& node : tree.select_nodes("//cp:lastModifiedBy")) - m_book->m_properties["last_modified_by"] = node.node().child_value(); - for (const auto& node : tree.select_nodes("//dcterms:created")) - m_book->m_properties["created"] = node.node().child_value(); - for (const auto& node : tree.select_nodes("//dcterms:modified")) - m_book->m_properties["modified"] = node.node().child_value(); - m_book->m_userName = m_book->m_properties["last_modified_by"].empty() ? - m_book->m_properties["creator"] : - m_book->m_properties["last_modified_by"]; -} - -void X12Book::handleStream() { - m_book->m_biffVersion = 80; - Formatting formatting(m_book); - formatting.initializeBook(); - - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "xl/workbook.xml", tree); - - for (const auto& node : tree.select_nodes("//definedNames")) { - handleDefinedNames(node.node()); - } - for (const auto& node : tree.select_nodes("//workbookPr")) { - std::string date = node.node().attribute("date1904").value(); - m_book->m_dateMode = (date == "1" || date == "true" || date == "on") ? 1 : 0; - } - for (const auto& node : tree.select_nodes("//sheet")) { - handleSheet(node.node()); - } -} - -// X12Book private: -void X12Book::handleDefinedNames(const pugi::xml_node& node) { - for (const auto& child : node) { - Name name(m_book); - name.m_nameIndex = m_book->m_nameObjList.size(); - name.m_name = child.attribute("name").value(); - name.m_rawFormula = ""; // Compiled bytecode formula - not in XLSX - name.m_formulaText = getNodeText(child); - /*map_attributes(_defined_name_attribute_map, node, name)*/ - if (name.m_scope != 0) - name.m_scope = -1; // Global - - try { - if (name.m_name.substr(0, 6) == "_xlnm.") - name.m_builtIn = 1; - } - catch (...) {} - m_book->m_nameObjList.push_back(name); - } - createNameMap(); -} - -void X12Book::handleSheet(const pugi::xml_node& node) { - size_t sheetIndex = m_book->m_sheetCount; - std::string relId = node.attribute("r:id").value(); - int sheetId = node.attribute("sheetId").as_int(); - std::string name = node.attribute("name").value(); - std::string state = node.attribute("state").value(); - - std::string relType = m_relIdToType[relId]; - std::string target = m_relIdToPath[relId]; - if (relType != "worksheet") - return; - - if (state == "hidden") - m_book->m_sheetVisibility.push_back(1); - else if (state == "veryHidden") - m_book->m_sheetVisibility.push_back(2); - else - m_book->m_sheetVisibility.push_back(0); - - // Add sheet information -// auto div = m_book->m_htmlTree.append_child("div"); -// div.append_attribute("id") = ("tabC"+ std::to_string(sheetIndex + 1)).c_str(); -// auto table = m_book->m_htmlTree.append_child("p"); - - m_book->m_sheetList.emplace_back(m_book, -1, name, sheetIndex, m_book->m_contentText); - m_book->m_sheetNames.push_back(name); - m_book->m_sheetCount += 1; - m_sheetTargets.push_back(target); - m_sheetIds.push_back(sheetId); - - Sheet& sheet = m_book->m_sheetList[m_book->m_sheetList.size() - 1]; - sheet.m_maxRowCount = X12_MAX_ROWS; - sheet.m_maxColCount = X12_MAX_COLS; - - size_t found = target.find_last_of("/"); - std::string relFileName = "xl/worksheets/_rels/"+ target.substr(found + 1) +".rels"; - - X12Sheet x12sheet(m_book, sheet); - x12sheet.handleRelations(relFileName); - x12sheet.handleStream(target); - - for (const auto& rel : x12sheet.m_relIdToType) { - if (rel.second == "comments") { - std::string commentFileName = x12sheet.m_relIdToPath[rel.first]; - if (!commentFileName.empty()) - x12sheet.handleComments(commentFileName); - } - } - -// if (m_book->m_extractImages) { -// x12sheet.getDrawingRelationshipMap(static_cast(sheetIndex)); -// x12sheet.handleImages(static_cast(sheetIndex), m_book->m_htmlTree); -// } - -// sheet.tidyDimensions(); -} - -void X12Book::createNameMap() { - m_book->m_nameScopeMap.clear(); - m_book->m_nameMap.clear(); - std::map>> nameMap; - size_t nameCount = m_book->m_nameObjList.size(); - for (size_t i = 0; i < nameCount; ++i) { - Name& name = m_book->m_nameObjList[i]; - std::string lcName = name.m_name; - std::transform(lcName.begin(), lcName.end(), lcName.begin(), ::tolower); - - std::pair key {lcName, name.m_scope}; - m_book->m_nameScopeMap.erase(key); - m_book->m_nameScopeMap.emplace(key, name); - - nameMap[lcName].emplace_back(name, static_cast(i)); - } - for (auto& map : nameMap) { - std::sort(map.second.begin(), map.second.end()); - for (const auto& obj : map.second) - m_book->m_nameMap[map.first].emplace_back(obj.first); - } -} - - -// X12Sheet public: -X12Sheet::X12Sheet(Book* book, Sheet& sheet) - : X12General(book), m_sheet(sheet) {} - -void X12Sheet::handleRelations(const std::string& fileName) { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, fileName, tree); - - for (const auto& node : tree.child("Relationships")) { - std::string relId = node.attribute("Id").value(); - std::string target = node.attribute("Target").value(); - std::string relType = node.attribute("Type").value(); - relType = relType.substr(relType.find_last_of("/") + 1); - - size_t found = fileName.find_last_of("/"); - std::string rels_fname = "xl/worksheets/_rels/"+ fileName.substr(found + 1) +".rels"; - - m_relIdToType[relId] = relType; - // normpath(join('xl/worksheets', target)) - m_relIdToPath[relId] = "xl/"+ target.substr(target.find_first_of("/") + 1); - } -} - -void X12Sheet::handleStream(const std::string& fileName) { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, fileName, tree); - - for (const auto& node : tree.select_nodes("//mergeCell")) - handleMergedCells(node.node()); - for (const auto& node : tree.select_nodes("//tablePart")) - handleTableParts(node.node()); - for (const auto& node : tree.select_nodes("//col")) - handleCol(node.node()); - for (const auto& node : tree.select_nodes("//row")) - handleRow(node.node()); - for (const auto& node : tree.select_nodes("//dimension")) - handleDimensions(node.node()); -} - -void X12Sheet::handleComments(const std::string& fileName) { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, fileName, tree); - - std::vector authors; - for (const auto& node : tree.select_nodes("//author")) { - authors.push_back(node.node().child_value()); - } - for (const auto& node : tree.select_nodes("//comment")) { - auto nd = node.node(); - Note note; - note.m_author = authors[nd.attribute("authorId").as_int()]; - cellNameToIndex(nd.attribute("ref").value(), note.m_rowIndex, note.m_colIndex); - for (const auto& child : nd.select_nodes("//t")) - note.m_text += getNodeText(child.node()) +" "; - m_sheet.m_cellNoteMap[{note.m_rowIndex, note.m_colIndex}] = note; - } -} - -void X12Sheet::getDrawingRelationshipMap(int sheetIndex) { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, - "xl/drawings/_rels/drawing"+ std::to_string(sheetIndex + 1)+".xml.rels", tree); - - for (const auto& node : tree.child("Relationships")) { - auto id = node.attribute("Id").value(); - if (id) - m_drawingRelationshipMap[id] = node.attribute("Target").value(); - } -} - -#if 0 -void X12Sheet::handleImages(int sheetIndex, pugi::xml_node& htmlNode) { - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, - "xl/drawings/drawing"+ std::to_string(sheetIndex + 1)+".xml", tree); - - for (const auto& node : tree.child("xdr:wsDr")) { - std::string imageId = node.select_node(".//a:blip").node().attribute("r:embed").value(); - - // This image does not have image id - if (m_drawingRelationshipMap.find(imageId) == m_drawingRelationshipMap.end()) - continue; - std::string path = "xl/" + m_drawingRelationshipMap[imageId].substr(3); - - // Load image - std::string ext = path.substr(path.find_last_of('.') + 1); - std::string imageData; - Ooxml::extractFile(m_book->m_fileName, path, imageData); - m_book->m_imageList.emplace_back(std::make_pair(std::move(imageData), ext)); - - // Add image node - auto imageNode = htmlNode.append_child("p").append_child("img"); - imageNode.append_attribute("data-tag") = m_book->m_imageList.size() - 1; - - // Add style - if (m_book->m_addStyle) - getImageSize(node, imageNode); - } -} -#endif - -// X12Sheet private: -void X12Sheet::handleCol(const pugi::xml_node& node) { - if (!m_book->m_addStyle) - return; - - int firstColIndex = node.attribute("min").as_int(); - int lastColIndex = node.attribute("max").as_int(); - Colinfo colinfo; - colinfo.m_width = static_cast(node.attribute("width").as_double() * 45 * 6); - colinfo.m_isHidden = node.attribute("hidden"); - //colinfo.m_bitFlag = ??? - colinfo.m_outlineLevel = node.attribute("outlineLevel").as_int(); - colinfo.m_isCollapsed = node.attribute("collapsed"); - - for (int i = firstColIndex; i <= lastColIndex; ++i) - m_sheet.m_colinfoMap[i-1] = colinfo; -} - -void X12Sheet::handleRow(const pugi::xml_node& node) { - int rowNumber = node.attribute("r").as_int(); - bool explicitRowNumber; - // Yes, it's optional - if (!rowNumber) { - m_rowIndex += 1; - explicitRowNumber = false; - } - else { - m_rowIndex = rowNumber - 1; - explicitRowNumber = true; - } - - // Read ROWINFO data - if (m_book->m_addStyle) { - Rowinfo rowinfo; - rowinfo.m_height = node.attribute("ht").as_int() * 20; - //rowinfo.m_hasDefaultHeight = ??? - rowinfo.m_outlineLevel = node.attribute("outlineLevel").as_int(); - //rowinfo.m_isOutlineGroupStartsEnds = ??? - rowinfo.m_isHidden = node.attribute("hidden"); - //rowinfo.m_isHeightMismatch = ??? - //rowinfo.m_hasAdditionalSpaceAbove = ??? - //rowinfo.m_hasAdditionalSpaceBelow = ??? - - m_sheet.m_rowinfoMap[rowNumber-1] = rowinfo; - } - - // Read cell data - int colIndex = -1; - for (const auto& cellNode: node) { - try { - std::string cellName = cellNode.attribute("r").value(); - // Yes, it's optional - if (cellName.empty()) { - colIndex += 1; - } - else { - // Extract column index from cell name (`A` => `0`, `Z` =>`25`, `AA` => `26`) - colIndex = 0; - char charIndex = -1; - for (const auto& c : cellName) { - charIndex += 1; - if (c == '$') - continue; - if (UPPERCASE_REL_INDEX.find(c) == UPPERCASE_REL_INDEX.end()) - throw std::logic_error( - "Unexpected character "+ std::string(1, c) +" in cell name "+ cellName - ); - - int lv = UPPERCASE_REL_INDEX.at(c); - if (lv) { - colIndex = colIndex * 26 + lv; - } - // Start of row number can't be '\0' - else { - colIndex--; - break; - } - } - - if (explicitRowNumber && cellName.substr(charIndex) != std::to_string(rowNumber)) - throw std::logic_error( - "Cell name "+ cellName +" but row number is "+ std::to_string(rowNumber) - ); - } - - int xfIndex = cellNode.attribute("s").as_int()+1; - std::string cellType = cellNode.attribute("t").value(); - std::string value; - std::string formula; - // n = number. Most frequent type. child contains plain text which can go straight - // into float() OR there's no text in which case it's a BLANK cell - if (cellType.empty()) { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "v") - value = child.child_value(); - else if (childName == "f") - formula = getNodeText(child); - else - throw std::logic_error("Unexpected tag " + childName); - } - if (value.empty()) { - // if (m_book->m_addStyle) - // m_sheet.putCell(m_rowIndex, colIndex, "", xfIndex); - } - else { - m_sheet.append(value); - // m_sheet.putCell(m_rowIndex, colIndex, value, xfIndex); - } - } - // s = index into shared string table. 2nd most frequent type child contains plain - // text which can go straight into int() - else if (cellType == "s") { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "v") - value = child.child_value(); - // Formula not expected here, but gnumeric does it - else if (childName == "f") - formula = child.child_value(); - else - throw std::logic_error( - "Cell type "+ cellType +" has unexpected child <"+ childName +"> at rowx="+ - std::to_string(m_rowIndex) +" colx="+ std::to_string(colIndex) - ); - } - // - if (value.empty()) { - // if (m_book->m_addStyle) - // m_sheet.putCell(m_rowIndex, colIndex, "", xfIndex); - } - else { - // m_sheet.putCell( - // m_rowIndex, colIndex, m_book->m_sharedStrings[stoi(value)], xfIndex - // ); - m_sheet.append(m_book->m_sharedStrings[stoi(value)]); - } - } - // str = string result from formula. Should have (formula) child; however in one file, - // all text cells are str with no formula. child can contain escapes - else if (cellType == "str") { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "v") - value = getNodeText(child); - else if (childName == "f") - formula = getNodeText(child); - else - throw std::logic_error( - "Cell type "+ cellType +" has unexpected child <"+ childName +"> at rowx="+ - std::to_string(m_rowIndex) +" colx="+ std::to_string(colIndex) - ); - } - m_sheet.append(value); - // m_sheet.putCell(m_rowIndex, colIndex, value, xfIndex); - } - // b = boolean. child contains "0" or "1". Maybe data should be converted with - // cnv_xsd_boolean; ECMA standard is silent; Excel 2007 writes 0 or 1 - else if (cellType == "b") { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "v") - value = child.child_value(); - else if (childName == "f") - formula = getNodeText(child); - else - throw std::logic_error( - "Cell type "+ cellType +" has unexpected child <"+ childName +"> at rowx="+ - std::to_string(m_rowIndex) +" colx="+ std::to_string(colIndex) - ); - } - m_sheet.append(value); - // m_sheet.putCell(m_rowIndex, colIndex, value, xfIndex); - } - // e = error. child contains e.g. "#REF!" - else if (cellType == "e") { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "v") - value = child.child_value(); - else if (childName == "f") - formula = getNodeText(child); - else - throw std::logic_error( - "Cell type "+ cellType +" has unexpected child <"+ childName +"> at rowx="+ - std::to_string(m_rowIndex) +" colx="+ std::to_string(colIndex) - ); - } - m_sheet.append(std::to_string(ERROR_CODE_FROM_TEXT.at(value))); - // m_sheet.putCell(m_rowIndex, colIndex, std::to_string(ERROR_CODE_FROM_TEXT.at(value)), - // xfIndex); - } - // Not expected in files produced by Excel. It's a way of allowing 3rd party s/w to write - // text (including rich text) cells without having to build a shared string table (SST) - else if (cellType == "inlineStr") { - for (const auto& child : cellNode) { - std::string childName = child.name(); - if (childName == "is") - value = getTextFromSiIs(child); - else if (childName == "v") - value = child.child_value(); - else if (childName == "f") - formula = child.child_value(); - else - throw std::logic_error( - "Cell type "+ cellType +" has unexpected child <"+ childName +"> at rowx="+ - std::to_string(m_rowIndex) +" colx="+ std::to_string(colIndex) - ); - } - if (value.empty()) { - // if (m_book->m_addStyle) - // m_sheet.putCell(m_rowIndex, colIndex, "", xfIndex); - } - else { - m_sheet.append(value); - // m_sheet.putCell(m_rowIndex, colIndex, value, xfIndex); - } - } - else { - throw std::logic_error( - "Unknown cell type "+ cellType +" in rowx="+ std::to_string(m_rowIndex) + - " colx="+ std::to_string(colIndex) - ); - } - } catch (...) { - return; - } - } -} - -void X12Sheet::handleDimensions(const pugi::xml_node& node) { - std::string ref = node.attribute("ref").value(); - if (!ref.empty()) { - size_t found = ref.find_last_of(":"); - std::string lastRef = ref.substr(found + 1); // Example: "Z99" - int rowIndex, colIndex; - cellNameToIndex(lastRef, rowIndex, colIndex, true); - m_sheet.m_dimensionRowCount = rowIndex + 1; - if (colIndex) - m_sheet.m_dimensionColCount = colIndex + 1; - } -} - -void X12Sheet::handleMergedCells(const pugi::xml_node& node) { - // The ref attribute should be a cell range like "B1:D5" - std::string ref = node.attribute("ref").value(); - if (!ref.empty()) { - size_t found = ref.find_last_of(":"); - std::string firstRef = ref.substr(0, found); - std::string lastRef = ref.substr(found + 1); - int firstRowIndex, lastRowIndex , firstColIndex, lastColIndex; - cellNameToIndex(firstRef, firstRowIndex, firstColIndex); - cellNameToIndex(lastRef, lastRowIndex, lastColIndex); - m_sheet.m_mergedCells.push_back({ - firstRowIndex, lastRowIndex + 1, - firstColIndex, lastColIndex + 1 - }); - } -} - -void X12Sheet::handleTableParts(const pugi::xml_node& node) { - // Get file path - std::string relId = node.attribute("r:id").value(); - std::string relType = m_relIdToType[relId]; - std::string target = m_relIdToPath[relId]; - if (relType != "table") - return; - - size_t found = target.find_last_of("/"); - std::string relFileName = "xl/tables/"+ target.substr(found + 1); - - // Extract file data - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, relFileName, tree); - - auto nd = tree.child("table"); - std::string ref = nd.attribute("ref").value(); - std::string styleName = nd.child("tableStyleInfo").attribute("name").value(); - - if (!ref.empty()) { - // Cell ranges - size_t found = ref.find_last_of(":"); - std::string firstRef = ref.substr(0, found); - std::string lastRef = ref.substr(found + 1); - int firstRowIndex, lastRowIndex , firstColIndex, lastColIndex; - cellNameToIndex(firstRef, firstRowIndex, firstColIndex); - cellNameToIndex(lastRef, lastRowIndex, lastColIndex); - // Style id - auto pos = styleName.find_first_of("0123456789"); - if (pos == std::string::npos) - return; - - int type = 100; - if (styleName.find("Medium") != std::string::npos) - type = 200; - else if (styleName.find("Dark") != std::string::npos) - type = 300; - - m_sheet.m_tableParts.push_back({ - firstRowIndex, lastRowIndex + 1, - firstColIndex, lastColIndex + 1, - stoi(styleName.substr(pos)) + type - }); - } -} - -void X12Sheet::cellNameToIndex(const std::string& cellName, int& rowIndex, - int& colIndex, bool noCol) -{ - colIndex = 0; - char charIndex = -1; - - for (const auto& c : cellName) { - charIndex += 1; - if (UPPERCASE_REL_INDEX.find(c) == UPPERCASE_REL_INDEX.end()) - throw std::logic_error( - "Unexpected character "+ std::string(1, c) +" in cell name "+ cellName - ); - - int lv = UPPERCASE_REL_INDEX.at(c); - if (lv) { - colIndex = colIndex * 26 + lv; - } - // Start of row number can't be '\0' - else { - if (charIndex == 0) { - // There was no col marker - if (noCol) { - colIndex = -1; - break; - } - else { - throw std::logic_error("Missing col in cell name "+ cellName); - } - } - else { - colIndex--; - break; - } - } - } - - rowIndex = stoi(cellName.substr(charIndex)) - 1; -} - -void X12Sheet::getImageSize(const pugi::xml_node& xmlNode, pugi::xml_node& htmlNode) const { - auto child = xmlNode.select_node(".//a:xfrm").node().child("a:ext"); - if (!child) - return; - - int width = child.attribute("cx").as_int() / 9525; // EMUS_PER_PIXEL - int height = child.attribute("cy").as_int() / 9525; // EMUS_PER_PIXEL - - std::string style = "width: " + std::to_string(width) + "px;"; - style += "height: " + std::to_string(height) + "px;"; - htmlNode.append_attribute("style") = style.c_str(); -} - - -// X12Styles public: -X12Styles::X12Styles(Book* book) -: X12General(book) { - for (int i = 14; i < 23; ++i) - m_isDateFormat[i] = 1; - for (int i = 45; i < 48; ++i) - m_isDateFormat[i] = 1; - // Dummy entry for XF 0 in case no Styles section - m_book->m_xfIndexXlTypeMap[0] = 0; -} - -void X12Styles::handleTheme() { - if (!m_book->m_addStyle) - return; - - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "xl/theme/theme1.xml", tree); - - int colorIndex = -2; - for (const auto& node : tree.select_nodes("//a:sysClr")) { - hexToColor(m_book->m_colorMap[colorIndex], node.node().attribute("lastClr").value(), 0); - colorIndex--; - } - colorIndex++; - for (const auto& node : tree.select_nodes("//a:srgbClr")) { - hexToColor(m_book->m_colorMap[colorIndex], node.node().attribute("val").value(), 0); - colorIndex--; - } -} - -void X12Styles::handleStream() { - if (!m_book->m_addStyle) - return; - - pugi::xml_document tree; - Ooxml::extractFile(m_book->m_fileName, "xl/styles.xml", tree); - - int fontIndex = 0; - for (const auto& node : tree.select_nodes("//numFmt")) - handleNumFormat(node.node()); - for (const auto& node : tree.select_nodes("//font")) - handleFont(node.node(), fontIndex++); - for (const auto& node : tree.select_nodes("//border")) - handleBorder(node.node()); - for (const auto& node : tree.select_nodes("//patternFill")) - handleBackground(node.node()); - for (const auto& node : tree.select_nodes("//xf")) - handleXf(node.node()); -} - -// X12Styles private: -void X12Styles::handleNumFormat(const pugi::xml_node& node) { - std::string formatCode = node.attribute("formatCode").value(); - int numFormatId = node.attribute("numFmtId").as_int(); - bool isDate = Formatting::isDateFormattedString(formatCode); - m_isDateFormat[numFormatId] = isDate; - m_book->m_formatMap.emplace(numFormatId, Format(numFormatId, isDate + 2, formatCode)); -} - -void X12Styles::handleFont(const pugi::xml_node& node, int fontIndex) { - Font f; - f.m_fontIndex = fontIndex; - - for (const auto& child : node) { - std::string childName = child.name(); - if (childName == "name") - f.m_name = child.attribute("val").value(); - else if (childName == "sz") - f.m_height = child.attribute("val").as_int() * 20; - else if (childName == "color") - extractColor(child, f.m_color); - else if (childName == "vertAlign") { - std::string val = child.attribute("val").value(); - if (val == "superscript") - f.m_escapement = 1; - if (val == "subscript") - f.m_escapement = 2; - } - else if (childName == "family") - f.m_family = child.attribute("val").as_int(); - else if (childName == "b") - f.m_isBold = true; - else if (childName == "i") - f.m_isItalic = true; - else if (childName == "u") { - f.m_isUnderlined = true; - std::string value = child.attribute("val").value(); - if (value == "double" || value == "doubleAccounting") - f.m_underlineType = 2; - else - f.m_underlineType = 1; - } - else if (childName == "strike") - f.m_isStruckOut = true; - } - - m_book->m_fontList.emplace_back(f); -} - -void X12Styles::handleBorder(const pugi::xml_node& node) { - XFBorder border; - - border.m_diagDown = node.attribute("diagonalDown"); - border.m_diagUp = node.attribute("diagonalUp"); - - for (const auto& child : node) { - std::string childName = child.name(); - if (childName == "left") { - border.m_leftLineStyle = XLSX_BORDER_TYPE.at(child.attribute("style").value()); - extractColor(child.first_child(), border.m_leftColor); - } - else if (childName == "right") { - border.m_rightLineStyle = XLSX_BORDER_TYPE.at(child.attribute("style").value()); - extractColor(child.first_child(), border.m_rightColor); - } - else if (childName == "top") { - border.m_topLineStyle = XLSX_BORDER_TYPE.at(child.attribute("style").value()); - extractColor(child.first_child(), border.m_topColor); - } - else if (childName == "bottom") { - border.m_bottomLineStyle = XLSX_BORDER_TYPE.at(child.attribute("style").value()); - extractColor(child.first_child(), border.m_bottomColor); - } - else if (childName == "diagonal") { - border.m_diagLineStyle = XLSX_BORDER_TYPE.at(child.attribute("style").value()); - extractColor(child.first_child(), border.m_diagColor); - } - } - - m_book->m_borderList.emplace_back(border); -} - -void X12Styles::handleBackground(const pugi::xml_node& node) { - XFBackground background; - - background.m_fillPattern = XLSX_FILL_PATTERN.at(node.attribute("patternType").value()); - - for (const auto& child : node) { - std::string childName = child.name(); - if (childName == "fgColor") - extractColor(child, background.m_patternColor); - else if (childName == "bgColor") - extractColor(child, background.m_backgroundColor); - } - - m_book->m_backgroundList.emplace_back(background); -} - -void X12Styles::handleXf(const pugi::xml_node& node) { - int xfIndex; - std::string parentName = node.parent().name(); - if (parentName == "cellStyleXfs") - xfIndex = m_xfCount[0]++; - else if (parentName == "cellXfs") - xfIndex = m_xfCount[1]++; - - XF xf; - int numFormatId = node.attribute("numFmtId").as_int(); - xf.m_fontIndex = node.attribute("fontId").as_int(); - xf.m_formatKey = numFormatId; - - //xf.m_protection.m_isCellLocked = ??? - xf.m_protection.m_isFormulaHidden = node.child("protection").attribute("hidden"); - //xf.m_isStyle = ??? - //xf.m_lotusPrefix = ??? - //xf.m_parentStyleIndex = ??? - - auto align = node.child("alignment"); - if (align) { - xf.m_alignment.m_horizontalAlign = XLSX_HORZ_ALIGN.at(align.attribute("horizontal").value()); - xf.m_alignment.m_isTextWrapped = align.attribute("wrapText").as_int(); - xf.m_alignment.m_verticalAlign = XLSX_VERT_ALIGN.at(align.attribute("vertical").value()); - xf.m_alignment.m_indentLevel = align.attribute("indent").as_int(); - xf.m_alignment.m_isShrinkToFit = align.attribute("shrinkToFit"); - //xf.m_alignment.m_textDirection = ??? - xf.m_alignment.m_rotation = align.attribute("textRotation").as_int(); - } - - //xf.m_formatFlag = ??? - xf.m_fontFlag = true; // Need to check - xf.m_alignmentFlag = node.attribute("applyAlignment"); - xf.m_borderFlag = node.attribute("applyBorder"); - xf.m_backgroundFlag = node.attribute("applyFill"); - xf.m_protectionFlag = node.attribute("applyProtection"); - - xf.m_border = m_book->m_borderList[node.attribute("borderId").as_int()]; - xf.m_background = m_book->m_backgroundList[node.attribute("fillId").as_int()]; - - m_book->m_xfList.push_back(xf); - m_book->m_xfCount += 1; - m_book->m_xfIndexXlTypeMap[xfIndex] = m_isDateFormat[numFormatId] + 2; -} - -void X12Styles::extractColor(const pugi::xml_node& node, XFColor& color) { - color.m_tint = node.attribute("tint").as_double(); - if (node.attribute("indexed")) - color.m_index = node.attribute("indexed").as_int(); - else if (node.attribute("theme")) - color.m_index = -1 - node.attribute("theme").as_int(); - else if (node.attribute("auto")) - color.m_index = 0; - else if (node.attribute("rgb")) { - color.m_isRgb = true; - hexToColor(color.m_rgb, node.attribute("rgb").value(), 2); - } -} - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/xlsx.hpp b/3rdparty/libs/fileext/excel/xlsx.hpp deleted file mode 100644 index 6ee67a6..0000000 --- a/3rdparty/libs/fileext/excel/xlsx.hpp +++ /dev/null @@ -1,427 +0,0 @@ -/** - * @brief Excel files (xls/xlsx) into HTML сonverter - * @package excel - * @file xlsx.hpp - * @author dmryutov (dmryutov@gmail.com) - * @copyright python-excel (https://github.com/python-excel/xlrd) - * @date 02.12.2016 -- 18.10.2017 - */ -#pragma once - -#include -#include -#include -#include - -#include "fileext/ooxml/ooxml.hpp" - -#include "book.hpp" -#include "format.hpp" - - -namespace excel { - -/** - * @class Xlsx - * @brief - * Wrapper for XLSX format - */ -class Xlsx: public ooxml::Ooxml { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - Xlsx(Book* book); - - /** - * @brief - * Read XLSX WorkBook - * @since 1.0 - */ - void openWorkbookXlsx(); - - /** Pointer to parent Book object */ - Book* m_book; -}; - - -/** - * @class X12General - * @brief - * Base class for document components - */ -class X12General: public ooxml::Ooxml { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - X12General(Book* book); - - /** - * @brief - * Get node text - * @param[in] node - * Node in XML-tree - * @return - * Node text - * @since 1.0 - */ - std::string getNodeText(const pugi::xml_node& node); - - /** - * @brief - * Get text from `is` or `is` nodes - * @param[in] node - * Node in XML-tree - * @return - * Node text - * @since 1.0 - */ - std::string getTextFromSiIs(const pugi::xml_node& node); - - /** - * @brief - * Convert hex string to color - * @param[out] colorMap - * Array in which color will be saved - * @param[in] color - * Hex string - * @param[in] offset - * Start position in hex string - * @since 1.0 - */ - void hexToColor(std::vector& colorMap, const std::string& color, int offset = 0); - - /** Pointer to parent Book object */ - Book* m_book; -}; - - -/** - * @class X12Book - * @brief - * Excel Workbook data - */ -class X12Book: public X12General { -public: - /** - * @param book - * Pointer to parent Book object - * @since 1.0 - */ - X12Book(Book* book); - - /** - * @brief - * Read SST (Shared Strings Table) data - * @since 1.0 - */ - void handleSst(); - - /** - * @brief - * Read relations data - * @since 1.0 - */ - void handleRelations(); - - /** - * @brief - * Read properties data - * @since 1.0 - */ - void handleProperties(); - - /** - * @brief - * Read main stream data - * @since 1.0 - */ - void handleStream(); - -private: - /** - * @brief - * Read defined names data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleDefinedNames(const pugi::xml_node& node); - - /** - * @brief - * Read sheet data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleSheet(const pugi::xml_node& node); - - /** - * @brief - * Create name map - * @since 1.0 - */ - void createNameMap(); - - /** Map relation id to path */ - std::unordered_map m_relIdToPath; - /** Map relation id to type */ - std::unordered_map m_relIdToType; - /** Sheet target list */ - std::vector m_sheetTargets; - /** Sheet id list */ - std::vector m_sheetIds; -}; - - -/** - * @class X12Sheet - * @brief - * Contains data for one worksheet - */ -class X12Sheet: public X12General { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @param[in] sheet - * Reference to SHEET object - * @since 1.0 - */ - X12Sheet(Book* book, Sheet& sheet); - - /** - * @brief - * Read relations data - * @param[in] fileName - * XML data file name - * @since 1.0 - */ - void handleRelations(const std::string& fileName); - - /** - * @brief - * Read main stream data - * @param[in] fileName - * XML data file name - * @since 1.0 - */ - void handleStream(const std::string& fileName); - - /** - * @brief - * Read comments/notes data - * @param[in] fileName - * XML data file name - * @since 1.0 - */ - void handleComments(const std::string& fileName); - - /** - * @brief - * Get drawing relationship map from `xl/drawings/_rels/drawingN.xml.rels` - * @param[in] sheetIndex - * Sheet index - * @since 1.1 - */ - void getDrawingRelationshipMap(int sheetIndex); - - /** - * @brief - * Read images data - * @param[in] sheetIndex - * Sheet index - * @param[out] htmlNode - * Parent HTML-node - * @since 1.1 - */ - void handleImages(int sheetIndex, pugi::xml_node& htmlNode); - - /** Map relation id to path */ - std::unordered_map m_relIdToPath; - /** Map relation id to type */ - std::unordered_map m_relIdToType; - -private: - /** - * @brief - * Read column (COLINFO) data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleCol(const pugi::xml_node& node); - - /** - * @brief - * Read row (Cell + ROWINFO) data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleRow(const pugi::xml_node& node); - - /** - * @brief - * Get sheet dimensions. Example: "A1:Z99" or just "A1" - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleDimensions(const pugi::xml_node& node); - - /** - * @brief - * Get merged cells - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleMergedCells(const pugi::xml_node& node); - - /** - * @brief - * Get table parts information - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleTableParts(const pugi::xml_node& node); - - /** - * @brief - * Convert cell name to row/column indexes - * @details - * Example: - * @code `A` => `0`, `Z` =>`25`, `AA` => `26` @endcode - * @param[in] cellName - * Cell name - * @param[out] rowIndex - * Row index - * @param[out] colIndex - * Column index - * @param[in] noCol - * If there was no column marker - * @since 1.0 - */ - void cellNameToIndex(const std::string& cellName, int& rowIndex,int& colIndex, - bool noCol = false); - - /** - * @brief - * Get image size and update `img` tag - * @param[in] xmlNode - * XML-node - * @param[out] htmlNode - * Parent HTML-node - * @since 1.1 - */ - void getImageSize(const pugi::xml_node& xmlNode, pugi::xml_node& htmlNode) const; - - /** Reference to SHEET object */ - Sheet& m_sheet; - /** Drawing relationship map */ - std::unordered_map m_drawingRelationshipMap; - /** Row index */ - int m_rowIndex = -1; -}; - - -/** - * @class X12Styles - * @brief - * Contains style data - */ -class X12Styles: public X12General { -public: - /** - * @param[in] book - * Pointer to parent Book object - * @since 1.0 - */ - X12Styles(Book* book); - - /** - * @brief - * Read theme data - * @since 1.0 - */ - void handleTheme(); - - /** - * @brief - * Read main stream data - * @since 1.0 - */ - void handleStream(); - -private: - /** - * @brief - * Read number format record - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleNumFormat(const pugi::xml_node& node); - - /** - * @brief - * Read FONT data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleFont(const pugi::xml_node& node, int fontIndex); - - /** - * @brief - * Read BORDER data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleBorder(const pugi::xml_node& node); - - /** - * @brief - * Read BACKGROUND data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleBackground(const pugi::xml_node& node); - - /** - * @brief - * Read XF data - * @param[in] node - * Node in XML-tree - * @since 1.0 - */ - void handleXf(const pugi::xml_node& node); - - /** - * @brief - * Get color information from node - * @param[in] node - * Node in XML-tree - * @param[in] color - * Reference to parent XFColor object - * @since 1.0 - */ - void extractColor(const pugi::xml_node& node, XFColor& color); - - /** Used XF records count */ - std::vector m_xfCount = {0, 0}; - /** If formatted string is date */ - std::unordered_map m_isDateFormat; -}; - -} // End namespace diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_private.h b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_private.h new file mode 100644 index 0000000..f4897bd --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_private.h @@ -0,0 +1,56 @@ +#ifndef INCLUDED_XLSXIO_PRIVATE_H +#define INCLUDED_XLSXIO_PRIVATE_H + +#if defined(_MSC_VER) || (defined(__MINGW32__) && !defined(__MINGW64__)) +#define strcasecmp _stricmp +#endif +#ifdef _WIN32 +#define wcscasecmp _wcsicmp +#endif + + +#define XLSXIOCHAR XML_Char + +#if !defined(XML_UNICODE_WCHAR_T) && !defined(XML_UNICODE) + +//UTF-8 version +#define X(s) s +#ifdef _WIN32 +#define XML_Char_icmp stricmp +#else +#define XML_Char_icmp strcasecmp +#endif +#define XML_Char_len strlen +#define XML_Char_dup strdup +#define XML_Char_cpy strcpy +#define XML_Char_poscpy(d,p,s,l) memcpy(d + p, s, l) +#define XML_Char_malloc(n) ((char*)malloc(n)) +#define XML_Char_realloc(m,n) ((char*)realloc((m), (n))) +#define XML_Char_tol(s) strtol((s), NULL, 10) +#define XML_Char_tod(s) strtod((s), NULL) +#define XML_Char_strtol strtol +#define XML_Char_sscanf sscanf +#define XML_Char_printf printf + +#else + +//UTF-16 version +#include +#define X(s) L##s +#define XML_Char_icmp wcscasecmp +#define XML_Char_len wcslen +#define XML_Char_dup wcsdup +#define XML_Char_cpy wcscpy +#define XML_Char_poscpy(d,p,s,l) wmemcpy(d + p, s, l) +#define XML_Char_malloc(n) ((XML_Char*)malloc((n) * sizeof(XML_Char))) +#define XML_Char_realloc(m,n) ((XML_Char*)realloc((m), (n) * sizeof(XML_Char))) +#define XML_Char_tol(s) wcstol((s), NULL, 10) +#define XML_Char_tod(s) wcstod((s), NULL) +#define XML_Char_strtol wcstol +#define XML_Char_sscanf swscanf +#define XML_Char_printf wprintf + +#endif + + +#endif diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.c b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.c new file mode 100644 index 0000000..fcf18a9 --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.c @@ -0,0 +1,1640 @@ +#include "xlsxio_private.h" +#include "xlsxio_read_sharedstrings.h" +#include "xlsxio_read.h" +#include "xlsxio_version.h" +#include +#include +#include +#include +#include + +#if defined(USE_MINIZIP) || defined(USE_MINIZIP_NG) +# ifdef USE_MINIZIP_NG +# include +# else +# include +# endif +# define ZIPFILETYPE unzFile +# define ZIPFILEENTRYTYPE unzFile +# if defined(_MSC_VER) +# include +# define IOSIZETYPE int +# define IOFN(fn) _##fn +# else +# include +# define IOSIZETYPE ssize_t +# define IOFN(fn) fn +# endif + +# if defined(MZ_COMPRESS_METHOD_DEFLATE) // support minizip2 which defines MZ_COMPRESS_METHOD_DEFLATE instead of Z_DEFLATED +# ifndef ZCALLBACK +# define ZCALLBACK +# endif +# define voidpf void* +# define uLong unsigned long +# endif + +#else +# if (defined(STATIC) || defined(BUILD_XLSXIO_STATIC) || defined(BUILD_XLSXIO_STATIC_DLL) || (defined(BUILD_XLSXIO) && !defined(BUILD_XLSXIO_DLL) && !defined(BUILD_XLSXIO_SHARED))) && !defined(ZIP_STATIC) +# define ZIP_STATIC +# endif +# include +# define ZIPFILETYPE zip_t +# define ZIPFILEENTRYTYPE zip_file_t +# ifndef USE_LIBZIP +# define USE_LIBZIP +# endif +#endif + +#if defined(_MSC_VER) +# undef DLL_EXPORT_XLSXIO +# define DLL_EXPORT_XLSXIO +#endif + +#define PARSE_BUFFER_SIZE 256 +//#define PARSE_BUFFER_SIZE 4 + +static const XLSXIOCHAR* xlsx_content_type = X("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"); +static const XLSXIOCHAR* xlsm_content_type = X("application/vnd.ms-excel.sheet.macroEnabled.main+xml"); +static const XLSXIOCHAR* xltx_content_type = X("application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml"); +static const XLSXIOCHAR* xltm_content_type = X("application/vnd.ms-excel.template.macroEnabled.main+xml"); + +#if !defined(XML_UNICODE_WCHAR_T) && !defined(XML_UNICODE) + +//UTF-8 version +#define XML_Char_dupchar strdup + +static ZIPFILEENTRYTYPE* XML_Char_openzip (ZIPFILETYPE* archive, const XML_Char* filename, int flags) +{ + if (!filename || !*filename) + return NULL; +#ifdef USE_MINIZIP + if (unzLocateFile(archive, filename, 0) != UNZ_OK) + return NULL; + if (unzOpenCurrentFile(archive) != UNZ_OK) + return NULL; + return archive; +#else + return zip_fopen(archive, filename, flags); +#endif +} + +#else + +//UTF-16 version +static XML_Char* XML_Char_dupchar(const char* s) +{ + size_t len; + XML_Char* result; + if (!s || (len = mbstowcs(NULL, s, 0)) < 0) + return NULL; + if ((result = XML_Char_malloc(len + 1)) != NULL) { + if ((mbstowcs(result, s, len + 1) != len)) { + free(result); + return NULL; + } + } + return result; +} + +static char* chardupXML_Char(const XML_Char* s) +{ + size_t len; + char* result; + if (!s || (len = wcstombs(NULL, s, 0)) == -1) + return NULL; + if ((result = (char*)malloc(len + 1)) != NULL) { + if ((wcstombs(result, s, len + 1) != len)) { + free(result); + return NULL; + } + } + return result; +} + +static ZIPFILEENTRYTYPE* XML_Char_openzip (ZIPFILETYPE* archive, const XML_Char* filename, int flags) +{ + ZIPFILEENTRYTYPE* result; + char* s; + if (!filename || !*filename) + return NULL; + if ((s = chardupXML_Char(filename)) == NULL) + return NULL; +#ifdef USE_MINIZIP + if (unzLocateFile(archive, s, 0) != UNZ_OK) + result = NULL; + else if (unzOpenCurrentFile(archive) != UNZ_OK) + result = NULL; + else + result = archive; +#else + result = zip_fopen(archive, s, flags); +#endif + free(s); + return result; +} + +#endif + + +DLL_EXPORT_XLSXIO void xlsxioread_get_version (int* pmajor, int* pminor, int* pmicro) +{ + if (pmajor) + *pmajor = XLSXIO_VERSION_MAJOR; + if (pminor) + *pminor = XLSXIO_VERSION_MINOR; + if (pmicro) + *pmicro = XLSXIO_VERSION_MICRO; +} + +DLL_EXPORT_XLSXIO const XLSXIOCHAR* xlsxioread_get_version_string () +{ + return (const XLSXIOCHAR*)XLSXIO_VERSION_STRING; +} + +//////////////////////////////////////////////////////////////////////// + +//process XML file contents +int expat_process_zip_file (ZIPFILETYPE* zip, const XML_Char* filename, XML_StartElementHandler start_handler, XML_EndElementHandler end_handler, XML_CharacterDataHandler data_handler, void* callbackdata, XML_Parser* xmlparser) +{ + ZIPFILEENTRYTYPE* zipfile; + XML_Parser parser; + void* buf; +#ifdef USE_MINIZIP + int buflen; +#else + zip_int64_t buflen; +#endif + int done; + enum XML_Status status = XML_STATUS_ERROR; + if ((zipfile = XML_Char_openzip(zip, filename, 0)) == NULL) { + return -1; + } + parser = XML_ParserCreate(NULL); + XML_SetUserData(parser, callbackdata); + XML_SetElementHandler(parser, start_handler, end_handler); + XML_SetCharacterDataHandler(parser, data_handler); + if (xmlparser) + *xmlparser = parser; + buf = XML_GetBuffer(parser, PARSE_BUFFER_SIZE); +#ifdef USE_MINIZIP + while (buf && (buflen = unzReadCurrentFile(zip, buf, PARSE_BUFFER_SIZE)) >= 0) { +#else + while (buf && (buflen = zip_fread(zipfile, buf, PARSE_BUFFER_SIZE)) >= 0) { +#endif + done = buflen < PARSE_BUFFER_SIZE; + if ((status = XML_ParseBuffer(parser, (int)buflen, (done ? 1 : 0))) == XML_STATUS_ERROR) { + break; + } + if (xmlparser && status == XML_STATUS_SUSPENDED) + return 0; + if (done) + break; + buf = XML_GetBuffer(parser, PARSE_BUFFER_SIZE); + } + XML_ParserFree(parser); +#ifdef USE_MINIZIP + unzCloseCurrentFile(zip); +#else + zip_fclose(zipfile); +#endif + //return (status == XML_STATUS_ERROR != XML_ERROR_FINISHED ? 1 : 0); + return 0; +} + +XML_Parser expat_process_zip_file_suspendable (ZIPFILEENTRYTYPE* zipfile, XML_StartElementHandler start_handler, XML_EndElementHandler end_handler, XML_CharacterDataHandler data_handler, void* callbackdata) +{ + XML_Parser result; + if ((result = XML_ParserCreate(NULL)) != NULL) { + XML_SetUserData(result, callbackdata); + XML_SetElementHandler(result, start_handler, end_handler); + XML_SetCharacterDataHandler(result, data_handler); + } + return result; +} + +enum XML_Status expat_process_zip_file_resume (ZIPFILEENTRYTYPE* zipfile, XML_Parser xmlparser) +{ + enum XML_Status status; + status = XML_ResumeParser(xmlparser); + if (status == XML_STATUS_SUSPENDED) + return status; + if (status == XML_STATUS_ERROR && XML_GetErrorCode(xmlparser) != XML_ERROR_NOT_SUSPENDED) + return status; + void* buf; +#ifdef USE_MINIZIP + int buflen; +#else + zip_int64_t buflen; +#endif + int done; + buf = XML_GetBuffer(xmlparser, PARSE_BUFFER_SIZE); +#ifdef USE_MINIZIP + while (buf && (buflen = unzReadCurrentFile(zipfile, buf, PARSE_BUFFER_SIZE)) >= 0) { +#else + while (buf && (buflen = zip_fread(zipfile, buf, PARSE_BUFFER_SIZE)) >= 0) { +#endif + done = buflen < PARSE_BUFFER_SIZE; + if ((status = XML_ParseBuffer(xmlparser, (int)buflen, (done ? 1 : 0))) == XML_STATUS_ERROR) + return status; + if (status == XML_STATUS_SUSPENDED) + return status; + if (done) + break; + buf = XML_GetBuffer(xmlparser, PARSE_BUFFER_SIZE); + } + //XML_ParserFree(xmlparser); + return status; +} + +//compare XML name ignoring case and ignoring namespace (returns 0 on match) +#ifdef ASSUME_NO_NAMESPACE +#define XML_Char_icmp_ins XML_Char_icmp +#else +int XML_Char_icmp_ins (const XML_Char* value, const XML_Char* name) +{ + size_t valuelen; + size_t namelen; + if (!value) + return (!name ? 0 : -1); + if (!name) + return -1; + valuelen = XML_Char_len(value); + namelen = XML_Char_len(name); + if (valuelen == namelen) + return XML_Char_icmp(value, name); + if (valuelen > namelen) { + if (value[valuelen - namelen - 1] != ':') + return 1; + return XML_Char_icmp(value + (valuelen - namelen), name); + } + return -1; +} +#endif + +//get expat attribute by name, returns NULL if not found +const XML_Char* get_expat_attr_by_name (const XML_Char** atts, const XML_Char* name) +{ + const XML_Char** p = atts; + if (p) { + while (*p) { + //if (XML_Char_icmp(*p++, name) == 0) + if (XML_Char_icmp_ins(*p++, name) == 0) + return *p; + p++; + } + } + return NULL; +} + +//generate .rels filename, returns NULL on error, caller must free result +XML_Char* get_relationship_filename (const XML_Char* filename) +{ + XML_Char* result; + size_t filenamelen = XML_Char_len(filename); + if ((result = XML_Char_malloc(filenamelen + 12)) != NULL) { + size_t i = filenamelen; + while (i > 0) { + if (filename[i - 1] == '/') + break; + i--; + } + XML_Char_poscpy(result, 0, filename, i); + XML_Char_poscpy(result, i, X("_rels/"), 6); + XML_Char_poscpy(result, i + 6, filename + i, filenamelen - i); + XML_Char_poscpy(result, filenamelen + 6, X(".rels"), 6); + } + return result; +} + +//join basepath and filename (caller must free result) +XML_Char* join_basepath_filename (const XML_Char* basepath, const XML_Char* filename) +{ + XML_Char* result = NULL; + if (filename && *filename) { + if (filename[0] == '/' && filename[1]) { + //file is absolute: remove leading slash + result = XML_Char_dup(filename + 1); + } else { + //file is relative: prepend base path + size_t basepathlen = (basepath ? XML_Char_len(basepath) : 0); + size_t filenamelen = XML_Char_len(filename); + if ((result = XML_Char_malloc(basepathlen + filenamelen + 1)) != NULL) { + if (basepathlen > 0) + XML_Char_poscpy(result, 0, basepath, basepathlen); + XML_Char_poscpy(result, basepathlen, filename, filenamelen); + result[basepathlen + filenamelen] = 0; + } + } + } + return result; +} + +//determine column number based on cell coordinate (e.g. "A1"), returns 1-based column number or 0 on error +size_t get_col_nr (const XML_Char* A1col) +{ + const XML_Char* p = A1col; + size_t result = 0; + if (p) { + while (*p) { + if (*p >= 'A' && *p <= 'Z') + result = result * 26 + (*p - 'A') + 1; + else if (*p >= 'a' && *p <= 'z') + result = result * 26 + (*p - 'a') + 1; + else if (*p >= '0' && *p <= '9' && p != A1col) + return result; + else + break; + p++; + } + } + return 0; +} + +//determine row number based on cell coordinate (e.g. "A1"), returns 1-based row number or 0 on error +size_t get_row_nr (const XML_Char* A1col) +{ + const XML_Char* p = A1col; + size_t result = 0; + if (p) { + while (*p) { + if ((*p >= 'A' && *p <= 'Z') || (*p >= 'a' && *p <= 'z')) + ; + else if (*p >= '0' && *p <= '9' && p != A1col) + result = result * 10 + (*p - '0'); + else + return 0; + p++; + } + } + return result; +} + +//////////////////////////////////////////////////////////////////////// + +struct xlsxio_read_struct { + ZIPFILETYPE* zip; +}; + +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open (const char* filename) +{ + xlsxioreader result; + if ((result = (xlsxioreader)malloc(sizeof(struct xlsxio_read_struct))) != NULL) { +#ifdef USE_MINIZIP + if ((result->zip = unzOpen(filename)) == NULL) { +#else + if ((result->zip = zip_open(filename, ZIP_RDONLY, NULL)) == NULL) { +#endif + free(result); + return NULL; + } + } + return result; +} + +#ifdef USE_MINIZIP +struct minizip_io_filehandle_data { + int filehandle; +}; + +voidpf ZCALLBACK minizip_io_filehandle_open_file_fn (voidpf opaque, const char* filename, int mode) +{ + if (!opaque || ((struct minizip_io_filehandle_data*)opaque)->filehandle < 0) + return NULL; + return &((struct minizip_io_filehandle_data*)opaque)->filehandle; +} + +uLong ZCALLBACK minizip_io_filehandle_read_file_fn (voidpf opaque, voidpf stream, void* buf, uLong size) +{ + IOSIZETYPE len; + if (!opaque || !stream || !buf || size == 0) + return 0; + if ((len = IOFN(read)(*(int*)stream, buf, size)) < 0) + return 0; + return len; +} + +/* +uLong ZCALLBACK minizip_io_filehandle_write_file_fn (voidpf opaque, voidpf stream, const void* buf, uLong size) +{ + return 0; +} +*/ + +int ZCALLBACK minizip_io_filehandle_close_file_fn (voidpf opaque, voidpf stream) +{ + if (stream) + close(*(int*)stream); + free(opaque); + return 0; +} + +int ZCALLBACK minizip_io_filehandle_testerror_file_fn (voidpf opaque, voidpf stream) +{ + return 0; +} + +long ZCALLBACK minizip_io_filehandle_tell_file_fn (voidpf opaque, voidpf stream) +{ + return IOFN(lseek)(*(int*)stream, 0, SEEK_CUR); +} + +long ZCALLBACK minizip_io_filehandle_seek_file_fn (voidpf opaque, voidpf stream, uLong offset, int origin) +{ + int whence; + if (!opaque || !stream) + return -1; + switch (origin) { + case ZLIB_FILEFUNC_SEEK_CUR : + whence = SEEK_CUR; + break; + case ZLIB_FILEFUNC_SEEK_END : + whence = SEEK_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + whence = SEEK_SET; + break; + default : + return -1; + } + return (IOFN(lseek)(*(int*)stream, offset, whence) >= 0 ? 0 : -1); +} +#endif + +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open_filehandle (int filehandle) +{ + xlsxioreader result; + if ((result = (xlsxioreader)malloc(sizeof(struct xlsxio_read_struct))) != NULL) { +#ifdef USE_MINIZIP + zlib_filefunc_def minizip_io_filehandle_functions; + if ((minizip_io_filehandle_functions.opaque = malloc(sizeof(struct minizip_io_filehandle_data))) == NULL) { + free(result); + return NULL; + } + minizip_io_filehandle_functions.zopen_file = minizip_io_filehandle_open_file_fn; + minizip_io_filehandle_functions.zread_file = minizip_io_filehandle_read_file_fn; + minizip_io_filehandle_functions.zwrite_file = /*minizip_io_filehandle_write_file_fn*/NULL; + minizip_io_filehandle_functions.ztell_file = minizip_io_filehandle_tell_file_fn; + minizip_io_filehandle_functions.zseek_file = minizip_io_filehandle_seek_file_fn; + minizip_io_filehandle_functions.zclose_file = minizip_io_filehandle_close_file_fn; + minizip_io_filehandle_functions.zerror_file = minizip_io_filehandle_testerror_file_fn; + ((struct minizip_io_filehandle_data*)minizip_io_filehandle_functions.opaque)->filehandle = filehandle; + if ((result->zip = unzOpen2(NULL, &minizip_io_filehandle_functions)) == NULL) { + free(result); + return NULL; + } +#else + if ((result->zip = zip_fdopen(filehandle, ZIP_RDONLY, NULL)) == NULL) { + free(result); + return NULL; + } +#endif + } + return result; +} + +#ifdef USE_MINIZIP +struct minizip_io_memory_data { + void* data; + uint64_t datalen; + int freedata; +}; + +struct minizip_io_memory_handle { + uint64_t pos; +}; + +voidpf ZCALLBACK minizip_io_memory_open_file_fn (voidpf opaque, const char* filename, int mode) +{ + struct minizip_io_memory_handle* result; + if (!opaque || !((struct minizip_io_memory_data*)opaque)->data) + return NULL; + if ((result = (struct minizip_io_memory_handle*)malloc(sizeof(struct minizip_io_memory_handle))) != NULL) { + result->pos = 0; + } + return result; +} + +uLong ZCALLBACK minizip_io_memory_read_file_fn (voidpf opaque, voidpf stream, void* buf, uLong size) +{ + uLong len; + if (!opaque || !stream || !buf || size == 0) + return 0; + if (((struct minizip_io_memory_handle*)stream)->pos + size <= ((struct minizip_io_memory_data*)opaque)->datalen) + len = size; + else + len = ((struct minizip_io_memory_data*)opaque)->datalen - ((struct minizip_io_memory_handle*)stream)->pos; + memcpy(buf, (char *)(((struct minizip_io_memory_data*)opaque)->data) + ((struct minizip_io_memory_handle*)stream)->pos, len); + ((struct minizip_io_memory_handle*)stream)->pos += len; + return len; +} + +/* +uLong ZCALLBACK minizip_io_memory_write_file_fn (voidpf opaque, voidpf stream, const void* buf, uLong size) +{ + return 0; +} +*/ + +int ZCALLBACK minizip_io_memory_close_file_fn (voidpf opaque, voidpf stream) +{ + free(stream); + if (opaque && ((struct minizip_io_memory_data*)opaque)->freedata) + free(((struct minizip_io_memory_data*)opaque)->data); + free(opaque); + return 0; +} + +int ZCALLBACK minizip_io_memory_testerror_file_fn (voidpf opaque, voidpf stream) +{ + return 0; +} + +long ZCALLBACK minizip_io_memory_tell_file_fn (voidpf opaque, voidpf stream) +{ + if (!opaque || !stream) + return 0; + return ((struct minizip_io_memory_handle*)stream)->pos; +} + +long ZCALLBACK minizip_io_memory_seek_file_fn (voidpf opaque, voidpf stream, uLong offset, int origin) +{ + switch (origin) { + case ZLIB_FILEFUNC_SEEK_CUR : + /*if (offset < 0) { + if (((struct minizip_io_memory_handle*)stream)->pos < -offset) + ((struct minizip_io_memory_handle*)stream)->pos = 0; + else + ((struct minizip_io_memory_handle*)stream)->pos += offset; + } else*/ { + if (((struct minizip_io_memory_handle*)stream)->pos + offset > ((struct minizip_io_memory_data*)opaque)->datalen) + ((struct minizip_io_memory_handle*)stream)->pos = ((struct minizip_io_memory_data*)opaque)->datalen; + else + ((struct minizip_io_memory_handle*)stream)->pos += offset; + } + break; + case ZLIB_FILEFUNC_SEEK_END : + /*if (offset < 0) { + if (((struct minizip_io_memory_data*)opaque)->datalen < -offset) + ((struct minizip_io_memory_handle*)stream)->pos = 0; + else + ((struct minizip_io_memory_handle*)stream)->pos = ((struct minizip_io_memory_data*)opaque)->datalen + offset; + } else*/ { + ((struct minizip_io_memory_handle*)stream)->pos = ((struct minizip_io_memory_data*)opaque)->datalen; + } + break; + case ZLIB_FILEFUNC_SEEK_SET : + /*if (offset < 0) { + ((struct minizip_io_memory_handle*)stream)->pos = 0; + } else*/ { + if (offset > ((struct minizip_io_memory_data*)opaque)->datalen) + ((struct minizip_io_memory_handle*)stream)->pos = ((struct minizip_io_memory_data*)opaque)->datalen; + else + ((struct minizip_io_memory_handle*)stream)->pos = offset; + } + ((struct minizip_io_memory_handle*)stream)->pos = offset; + break; + default : + return -1; + } + return 0; +} +#endif + +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open_memory (void* data, uint64_t datalen, int freedata) +{ + xlsxioreader result; +#ifdef USE_MINIZIP + if ((result = (xlsxioreader)malloc(sizeof(struct xlsxio_read_struct))) != NULL) { + zlib_filefunc_def minizip_io_memory_functions; + if ((minizip_io_memory_functions.opaque = malloc(sizeof(struct minizip_io_memory_data))) == NULL) { + free(result); + return NULL; + } + minizip_io_memory_functions.zopen_file = minizip_io_memory_open_file_fn; + minizip_io_memory_functions.zread_file = minizip_io_memory_read_file_fn; + minizip_io_memory_functions.zwrite_file = /*minizip_io_memory_write_file_fn*/NULL; + minizip_io_memory_functions.ztell_file = minizip_io_memory_tell_file_fn; + minizip_io_memory_functions.zseek_file = minizip_io_memory_seek_file_fn; + minizip_io_memory_functions.zclose_file = minizip_io_memory_close_file_fn; + minizip_io_memory_functions.zerror_file = minizip_io_memory_testerror_file_fn; + ((struct minizip_io_memory_data*)minizip_io_memory_functions.opaque)->data = data; + ((struct minizip_io_memory_data*)minizip_io_memory_functions.opaque)->datalen = datalen; + ((struct minizip_io_memory_data*)minizip_io_memory_functions.opaque)->freedata = freedata; + if ((result->zip = unzOpen2(NULL, &minizip_io_memory_functions)) == NULL) { + free(result); + return NULL; + } + } +#else + zip_source_t* zipsrc; + if ((zipsrc = zip_source_buffer_create(data, datalen, freedata, NULL)) == NULL) { + return NULL; + } + if ((result = (xlsxioreader)malloc(sizeof(struct xlsxio_read_struct))) != NULL) { + if ((result->zip = zip_open_from_source(zipsrc, ZIP_RDONLY, NULL)) == NULL) { + zip_source_free(zipsrc); + free(result); + return NULL; + } + } +#endif + return result; +} + +DLL_EXPORT_XLSXIO void xlsxioread_close (xlsxioreader handle) +{ + if (handle) { + //note: no need to call zip_source_free() after successful use in zip_open_from_source() +#ifdef USE_MINIZIP + unzClose(handle->zip); +#else + zip_close(handle->zip); +#endif + free(handle); + } +} + +//////////////////////////////////////////////////////////////////////// + +//callback function definition for use with iterate_files_by_contenttype +typedef void (*contenttype_file_callback_fn)(ZIPFILETYPE* zip, const XML_Char* filename, const XML_Char* contenttype, void* callbackdata); + +struct iterate_files_by_contenttype_callback_data { + ZIPFILETYPE* zip; + const XML_Char* contenttype; + contenttype_file_callback_fn filecallbackfn; + void* filecallbackdata; +}; + +//expat callback function for element start used by iterate_files_by_contenttype +void iterate_files_by_contenttype_expat_callback_element_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct iterate_files_by_contenttype_callback_data* data = (struct iterate_files_by_contenttype_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("Override")) == 0) { + //explicitly specified file + const XML_Char* contenttype; + const XML_Char* partname; + if ((contenttype = get_expat_attr_by_name(atts, X("ContentType"))) != NULL && XML_Char_icmp(contenttype, data->contenttype) == 0) { + if ((partname = get_expat_attr_by_name(atts, X("PartName"))) != NULL) { + if (partname[0] == '/') + partname++; + data->filecallbackfn(data->zip, partname, contenttype, data->filecallbackdata); + } + } + } else if (XML_Char_icmp_ins(name, X("Default")) == 0) { + //by extension + const XML_Char* contenttype; + const XML_Char* extension; + if ((contenttype = get_expat_attr_by_name(atts, X("ContentType"))) != NULL && XML_Char_icmp(contenttype, data->contenttype) == 0) { + if ((extension = get_expat_attr_by_name(atts, X("Extension"))) != NULL) { + XML_Char* filename; + size_t filenamelen; + size_t extensionlen = XML_Char_len(extension); +#ifdef USE_MINIZIP +#define UNZIP_FILENAME_BUFFER_STEP 32 + char* buf; + size_t buflen; + int status; +unz_global_info zipglobalinfo; +unzGetGlobalInfo(data->zip, &zipglobalinfo); + buf = (char*)malloc(buflen = UNZIP_FILENAME_BUFFER_STEP); + status = unzGoToFirstFile(data->zip); + while (status == UNZ_OK) { + buf[buflen - 1] = 0; + while ((status = unzGetCurrentFileInfo(data->zip, NULL, buf, buflen, NULL, 0, NULL, 0)) == UNZ_OK && buf[buflen - 1] != 0) { + buflen += UNZIP_FILENAME_BUFFER_STEP; + if ((buf = (char*)realloc(buf, buflen)) == NULL) { + //memory allocation error + return; + } + buf[buflen - 1] = 0; + } + if (status != UNZ_OK) + break; + filename = XML_Char_dupchar(buf); + status = unzGoToNextFile(data->zip); +#else + zip_int64_t i; + zip_int64_t zipnumfiles = zip_get_num_entries(data->zip, 0); + for (i = 0; i < zipnumfiles; i++) { + filename = XML_Char_dupchar(zip_get_name(data->zip, i, ZIP_FL_ENC_GUESS)); +#endif + filenamelen = XML_Char_len(filename); + if (filenamelen > extensionlen && filename[filenamelen - extensionlen - 1] == '.' && XML_Char_icmp(filename + filenamelen - extensionlen, extension) == 0) { + data->filecallbackfn(data->zip, filename, contenttype, data->filecallbackdata); + } + free(filename); + } +#ifdef USE_MINIZIP + free(buf); +#endif + } + } + } +} + +//list file names by content type +int iterate_files_by_contenttype (ZIPFILETYPE* zip, const XML_Char* contenttype, contenttype_file_callback_fn filecallbackfn, void* filecallbackdata, XML_Parser* xmlparser) +{ + struct iterate_files_by_contenttype_callback_data callbackdata = { + .zip = zip, + .contenttype = contenttype, + .filecallbackfn = filecallbackfn, + .filecallbackdata = filecallbackdata + }; + return expat_process_zip_file(zip, X("[Content_Types].xml"), iterate_files_by_contenttype_expat_callback_element_start, NULL, NULL, &callbackdata, xmlparser); +} + +//////////////////////////////////////////////////////////////////////// + +//callback structure used by main_sheet_list_expat_callback_element_start +struct main_sheet_list_callback_data { + XML_Parser xmlparser; + xlsxioread_list_sheets_callback_fn callback; + void* callbackdata; +}; + +//callback used by xlsxioread_list_sheets +void main_sheet_list_expat_callback_element_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct main_sheet_list_callback_data* data = (struct main_sheet_list_callback_data*)callbackdata; + if (data && data->callback) { + if (XML_Char_icmp_ins(name, X("sheet")) == 0) { + const XML_Char* sheetname; + //const XML_Char* relid = get_expat_attr_by_name(atts, X("r:id")); + if ((sheetname = get_expat_attr_by_name(atts, X("name"))) != NULL) { + if (data->callback) { + if ((*data->callback)(sheetname, data->callbackdata) != 0) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } +/* + } else { + //for non-calback method suspend here + XML_StopParser(data->xmlparser, XML_TRUE); +*/ + } + } + } + } +} + +//process contents each sheet listed in main sheet +void xlsxioread_list_sheets_callback (ZIPFILETYPE* zip, const XML_Char* filename, const XML_Char* contenttype, void* callbackdata) +{ + //get sheet information from file + expat_process_zip_file(zip, filename, main_sheet_list_expat_callback_element_start, NULL, NULL, callbackdata, &((struct main_sheet_list_callback_data*)callbackdata)->xmlparser); +} + +//list all worksheets +DLL_EXPORT_XLSXIO void xlsxioread_list_sheets (xlsxioreader handle, xlsxioread_list_sheets_callback_fn callback, void* callbackdata) +{ + if (!handle || !callback) + return; + //process contents of main sheet + struct main_sheet_list_callback_data sheetcallbackdata = { + .xmlparser = NULL, + .callback = callback, + .callbackdata = callbackdata + }; + iterate_files_by_contenttype(handle->zip, xlsx_content_type, xlsxioread_list_sheets_callback, &sheetcallbackdata, &sheetcallbackdata.xmlparser); + iterate_files_by_contenttype(handle->zip, xlsm_content_type, xlsxioread_list_sheets_callback, &sheetcallbackdata, &sheetcallbackdata.xmlparser); + iterate_files_by_contenttype(handle->zip, xltx_content_type, xlsxioread_list_sheets_callback, &sheetcallbackdata, &sheetcallbackdata.xmlparser); + iterate_files_by_contenttype(handle->zip, xltm_content_type, xlsxioread_list_sheets_callback, &sheetcallbackdata, &sheetcallbackdata.xmlparser); +} + +//////////////////////////////////////////////////////////////////////// + +//callback data structure used by main_sheet_get_sheetfile_callback +struct main_sheet_get_rels_callback_data { + XML_Parser xmlparser; + const XML_Char* sheetname; + XML_Char* basepath; + XML_Char* sheetrelid; + XML_Char* sheetfile; + XML_Char* sharedstringsfile; + XML_Char* stylesfile; +}; + +//determine relationship id for specific sheet name +void main_sheet_get_relid_expat_callback_element_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct main_sheet_get_rels_callback_data* data = (struct main_sheet_get_rels_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("sheet")) == 0) { + const XML_Char* sheetname; + if ((sheetname = get_expat_attr_by_name(atts, X("name"))) != NULL && (!data->sheetname || XML_Char_icmp(sheetname, data->sheetname) == 0)) { + const XML_Char* relid = get_expat_attr_by_name(atts, X("r:id")); + if (relid && *relid) { + data->sheetrelid = XML_Char_dup(relid); + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + } + } +} + +//determine file names for specific relationship id +void main_sheet_get_sheetfile_expat_callback_element_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct main_sheet_get_rels_callback_data* data = (struct main_sheet_get_rels_callback_data*)callbackdata; + if (data->sheetrelid) { + if (XML_Char_icmp_ins(name, X("Relationship")) == 0) { + const XML_Char* reltype; + if ((reltype = get_expat_attr_by_name(atts, X("Type"))) != NULL) { + if (XML_Char_icmp(reltype, X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet")) == 0) { + const XML_Char* relid = get_expat_attr_by_name(atts, X("Id")); + if (XML_Char_icmp(relid, data->sheetrelid) == 0) { + const XML_Char* filename = get_expat_attr_by_name(atts, X("Target")); + if (filename && *filename) { + data->sheetfile = join_basepath_filename(data->basepath, filename); + } + } + } else if (XML_Char_icmp(reltype, X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings")) == 0) { + const XML_Char* filename = get_expat_attr_by_name(atts, X("Target")); + if (filename && *filename) { + data->sharedstringsfile = join_basepath_filename(data->basepath, filename); + } + } else if (XML_Char_icmp(reltype, X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles")) == 0) { + const XML_Char* filename = get_expat_attr_by_name(atts, X("Target")); + if (filename && *filename) { + data->stylesfile = join_basepath_filename(data->basepath, filename); + } + } + } + } + } +} + +//determine the file name for a specified sheet name +void main_sheet_get_sheetfile_callback (ZIPFILETYPE* zip, const XML_Char* filename, const XML_Char* contenttype, void* callbackdata) +{ + struct main_sheet_get_rels_callback_data* data = (struct main_sheet_get_rels_callback_data*)callbackdata; + if (!data->sheetrelid) { + expat_process_zip_file(zip, filename, main_sheet_get_relid_expat_callback_element_start, NULL, NULL, callbackdata, &data->xmlparser); + } + if (data->sheetrelid) { + XML_Char* relfilename; + //determine base name (including trailing slash) + size_t i = XML_Char_len(filename); + while (i > 0) { + if (filename[i - 1] == '/') + break; + i--; + } + if (data->basepath) + free(data->basepath); + if ((data->basepath = XML_Char_malloc(i + 1)) != NULL) { + XML_Char_poscpy(data->basepath, 0, filename, i); + data->basepath[i] = 0; + } + //find sheet filename in relationship contents + if ((relfilename = get_relationship_filename(filename)) != NULL) { + expat_process_zip_file(zip, relfilename, main_sheet_get_sheetfile_expat_callback_element_start, NULL, NULL, callbackdata, &data->xmlparser); + free(relfilename); + } else { + free(data->sheetrelid); + data->sheetrelid = NULL; + if (data->basepath) { + free(data->basepath); + data->basepath = NULL; + } + } + } +} + +//////////////////////////////////////////////////////////////////////// + +typedef enum { + none, + value_string, + inline_string, + shared_string +} cell_string_type_enum; + +#define XLSXIOREAD_NO_CALLBACK 0x80 + +struct data_sheet_callback_data { + XML_Parser xmlparser; + struct sharedstringlist* sharedstrings; + size_t rownr; + size_t colnr; + size_t cols; + size_t colsnotnull; + XML_Char* celldata; + size_t celldatalen; + cell_string_type_enum cell_string_type; + unsigned int flags; + XML_Char* skiptag; //tag to skip + size_t skiptagcount; //nesting level for current tag to skip + XML_StartElementHandler skip_start; //start handler to set after skipping + XML_EndElementHandler skip_end; //end handler to set after skipping + XML_CharacterDataHandler skip_data; //data handler to set after skipping + xlsxioread_process_row_callback_fn sheet_row_callback; + xlsxioread_process_cell_callback_fn sheet_cell_callback; + void* callbackdata; +}; + +void data_sheet_callback_data_initialize (struct data_sheet_callback_data* data, struct sharedstringlist* sharedstrings, unsigned int flags, xlsxioread_process_cell_callback_fn cell_callback, xlsxioread_process_row_callback_fn row_callback, void* callbackdata) +{ + data->xmlparser = NULL; + data->sharedstrings = sharedstrings; + data->rownr = 0; + data->colnr = 0; + data->cols = 0; + data->colsnotnull = 0; + data->celldata = NULL; + data->celldatalen = 0; + data->cell_string_type = none; + data->flags = flags; + data->skiptag = NULL; + data->skiptagcount = 0; + data->skip_start = NULL; + data->skip_end = NULL; + data->skip_data = NULL; + data->sheet_cell_callback = cell_callback; + data->sheet_row_callback = row_callback; + data->callbackdata = callbackdata; +} + +void data_sheet_callback_data_cleanup (struct data_sheet_callback_data* data) +{ + sharedstringlist_destroy(data->sharedstrings); + free(data->celldata); + free(data->skiptag); +} + +void data_sheet_expat_callback_skip_tag_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (name && XML_Char_icmp_ins(name, data->skiptag) == 0) { + //increment nesting level + data->skiptagcount++; + } +} + +void data_sheet_expat_callback_skip_tag_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (!name || XML_Char_icmp_ins(name, data->skiptag) == 0) { + if (--data->skiptagcount == 0) { + //restore handlers when done skipping + XML_SetElementHandler(data->xmlparser, data->skip_start, data->skip_end); + XML_SetCharacterDataHandler(data->xmlparser, data->skip_data); + free(data->skiptag); + data->skiptag = NULL; + } + } +} + +void data_sheet_expat_callback_find_worksheet_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void data_sheet_expat_callback_find_worksheet_end (void* callbackdata, const XML_Char* name); +void data_sheet_expat_callback_find_sheetdata_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void data_sheet_expat_callback_find_sheetdata_end (void* callbackdata, const XML_Char* name); +void data_sheet_expat_callback_find_row_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void data_sheet_expat_callback_find_row_end (void* callbackdata, const XML_Char* name); +void data_sheet_expat_callback_find_cell_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void data_sheet_expat_callback_find_cell_end (void* callbackdata, const XML_Char* name); +void data_sheet_expat_callback_find_value_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void data_sheet_expat_callback_find_value_end (void* callbackdata, const XML_Char* name); +void data_sheet_expat_callback_value_data (void* callbackdata, const XML_Char* buf, int buflen); + +void data_sheet_expat_callback_find_worksheet_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("worksheet")) == 0) { + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_sheetdata_start, NULL); + } +} + +void data_sheet_expat_callback_find_worksheet_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("worksheet")) == 0) { + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_worksheet_start, NULL); + } +} + +void data_sheet_expat_callback_find_sheetdata_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("sheetData")) == 0) { + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_row_start, data_sheet_expat_callback_find_sheetdata_end); + } +} + +void data_sheet_expat_callback_find_sheetdata_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("sheetData")) == 0) { + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_sheetdata_start, data_sheet_expat_callback_find_worksheet_end); + } else { + data_sheet_expat_callback_find_worksheet_end(callbackdata, name); + } +} + +void data_sheet_expat_callback_find_row_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("row")) == 0) { + const XML_Char* hidden = get_expat_attr_by_name(atts, X("hidden")); + if (!(hidden && XML_Char_tol(hidden) != 0 && (data->flags & XLSXIOREAD_SKIP_HIDDEN_ROWS))) { + int skippedemptyrow = (data->rownr != 0 && data->colsnotnull == 0 && (data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS)); + data->rownr++; + data->colnr = 0; + data->colsnotnull = 0; + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_cell_start, data_sheet_expat_callback_find_row_end); + //for non-calback method suspend here on new row + if (data->flags & XLSXIOREAD_NO_CALLBACK) { + if (!skippedemptyrow) { + XML_StopParser(data->xmlparser, XML_TRUE); + } + } + } else { + //skip hidden row + XML_SetElementHandler(data->xmlparser, NULL, data_sheet_expat_callback_find_row_end); + } + } +} + +void data_sheet_expat_callback_find_row_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("row")) == 0) { + //determine number of columns based on first row + if ((data->flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && data->rownr == 1 && data->cols == 0) + data->cols = data->colnr; + //add empty columns if needed + if (!(data->flags & XLSXIOREAD_NO_CALLBACK) && data->sheet_cell_callback && !(data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS)) { + while (data->colnr < data->cols) { + if ((*data->sheet_cell_callback)(data->rownr, data->colnr + 1, NULL, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + data->colnr++; + } + } + free(data->celldata); + data->celldata = NULL; + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_row_start, data_sheet_expat_callback_find_sheetdata_end); + //process end of row + if (!(data->flags & XLSXIOREAD_NO_CALLBACK)) { + if (data->sheet_row_callback && !(data->colsnotnull == 0 && (data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS))) { + if ((*data->sheet_row_callback)(data->rownr, data->colnr, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + } + } else { + //for non-calback method suspend here on end of row + if (!(data->colsnotnull == 0 && (data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS))) + XML_StopParser(data->xmlparser, XML_TRUE); + } + } else { + data_sheet_expat_callback_find_sheetdata_end(callbackdata, name); + } +} + +void data_sheet_expat_callback_find_cell_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("c")) == 0) { + const XML_Char* t = get_expat_attr_by_name(atts, X("r")); + size_t cellcolnr = get_col_nr(t); + //insert empty rows if needed + if (data->colnr == 0) { + size_t cellrownr = get_row_nr(t); + if (cellrownr) { + if (!(data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS) && !(data->flags & XLSXIOREAD_NO_CALLBACK)) { + while (data->rownr < cellrownr) { + //insert empty columns + if (!(data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && data->sheet_cell_callback) { + while (data->colnr < data->cols) { + if ((*data->sheet_cell_callback)(data->rownr, data->colnr + 1, NULL, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + data->colnr++; + } + } + //finish empty row + if (data->sheet_row_callback) { + if ((*data->sheet_row_callback)(data->rownr, data->cols, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + } + data->rownr++; + data->colnr = 0; + } + } else { + data->rownr = cellrownr; + } + } + } + //insert empty columns if needed + if (cellcolnr) { + cellcolnr--; + if ((data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS) || data->colnr == 0 || (data->flags & XLSXIOREAD_NO_CALLBACK)) { + data->colnr = cellcolnr; + } else { + size_t cellmax = cellcolnr; + if ((data->flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && data->cols > 0 && cellmax > data->cols) + cellmax = data->cols; + while (data->colnr < cellmax) { + if (data->colnr > 0 && data->sheet_cell_callback) { + if ((*data->sheet_cell_callback)(data->rownr, data->colnr + 1, NULL, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + } + data->colnr++; + } + } + } + //determine value type + if ((t = get_expat_attr_by_name(atts, X("t"))) != NULL && XML_Char_icmp(t, X("s")) == 0) + data->cell_string_type = shared_string; + else + data->cell_string_type = value_string; + //prepare empty value data + free(data->celldata); + data->celldata = NULL; + data->celldatalen = 0; + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_value_start, data_sheet_expat_callback_find_cell_end); + } +} + +void data_sheet_expat_callback_find_cell_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("c")) == 0) { + //determine value + if (data->celldata) { + data->celldata[data->celldatalen] = 0; + if (data->cell_string_type == shared_string) { + //get shared string + XML_Char* p = NULL; + long num = XML_Char_strtol(data->celldata, &p, 10); + if (!p || (p != data->celldata && *p == 0)) { + const XML_Char* s = sharedstringlist_get(data->sharedstrings, num); + free(data->celldata); + data->celldata = (s ? XML_Char_dup(s) : NULL); + } + } else if (data->cell_string_type == none) { + //unknown value type + free(data->celldata); + data->celldata = NULL; + } + } + //reset data + data->colnr++; + data->cell_string_type = none; + data->celldatalen = 0; + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_cell_start, data_sheet_expat_callback_find_row_end); + XML_SetCharacterDataHandler(data->xmlparser, NULL); + //process data if needed + if (data->celldata || !((data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS) || ((data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS) && data->colsnotnull == 0))) { + if (!((data->flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && data->cols > 0 && data->colnr > data->cols)) { + //process data + if (!(data->flags & XLSXIOREAD_NO_CALLBACK)) { + if (data->sheet_cell_callback) { + //insert empty columns if needed in case of empty row detection + /////if ((data->flags & XLSXIOREAD_SKIP_EMPTY_ROWS) && !(data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && data->colsnotnull == 0 && data->colnr > 1) { + if (!(data->flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && data->colsnotnull == 0 && data->colnr > 1) { + size_t col; + for (col = 1; col < data->colnr; col++) { + if ((*data->sheet_cell_callback)(data->rownr, col, NULL, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + } + } + //process current column data + if ((*data->sheet_cell_callback)(data->rownr, data->colnr, data->celldata, data->callbackdata)) { + XML_StopParser(data->xmlparser, XML_FALSE); + return; + } + data->colsnotnull++; + } + } else { + //for non-calback method suspend here with cell data (don't return NULL as that is used to indicate end of row) + if (!data->celldata) + data->celldata = XML_Char_dup(X("")); + XML_StopParser(data->xmlparser, XML_TRUE); + data->colsnotnull++; + } + } + } + } else { + data_sheet_expat_callback_find_row_end(callbackdata, name); + } +} + +void data_sheet_expat_callback_find_value_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("v")) == 0 || XML_Char_icmp_ins(name, X("t")) == 0) { + XML_SetElementHandler(data->xmlparser, NULL, data_sheet_expat_callback_find_value_end); + XML_SetCharacterDataHandler(data->xmlparser, data_sheet_expat_callback_value_data); + } else if (XML_Char_icmp_ins(name, X("is")) == 0) { + data->cell_string_type = inline_string; + } else if (XML_Char_icmp_ins(name, X("rPh")) == 0) { + data->skiptag = XML_Char_dup(name); + data->skiptagcount = 1; + data->skip_start = data_sheet_expat_callback_find_value_start; + data->skip_end = data_sheet_expat_callback_find_cell_end; + data->skip_data = NULL; + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_skip_tag_start, data_sheet_expat_callback_skip_tag_end); + XML_SetCharacterDataHandler(data->xmlparser, NULL); + } +} + +void data_sheet_expat_callback_find_value_end (void* callbackdata, const XML_Char* name) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (XML_Char_icmp_ins(name, X("v")) == 0 || XML_Char_icmp_ins(name, X("t")) == 0) { + XML_SetElementHandler(data->xmlparser, data_sheet_expat_callback_find_value_start, data_sheet_expat_callback_find_cell_end); + XML_SetCharacterDataHandler(data->xmlparser, NULL); + } else if (XML_Char_icmp_ins(name, X("is")) == 0) { + data->cell_string_type = none; + } else { + data_sheet_expat_callback_find_row_end(callbackdata, name); + } +} + +void data_sheet_expat_callback_value_data (void* callbackdata, const XML_Char* buf, int buflen) +{ + struct data_sheet_callback_data* data = (struct data_sheet_callback_data*)callbackdata; + if (data->cell_string_type != none) { + if ((data->celldata = XML_Char_realloc(data->celldata, data->celldatalen + buflen + 1)) == NULL) { + //memory allocation error + data->celldatalen = 0; + } else { + //add new data to value buffer + XML_Char_poscpy(data->celldata, data->celldatalen, buf, buflen); + data->celldatalen += buflen; + } + } +} + +//////////////////////////////////////////////////////////////////////// + +struct xlsxio_read_sheet_struct { + xlsxioreader handle; + ZIPFILEENTRYTYPE* zipfile; + struct data_sheet_callback_data processcallbackdata; + size_t lastrownr; + size_t paddingrow; + size_t lastcolnr; + size_t paddingcol; +}; + +DLL_EXPORT_XLSXIO size_t xlsxioread_sheet_last_row_index (xlsxioreadersheet sheethandle) +{ + return sheethandle->lastrownr; +} + +DLL_EXPORT_XLSXIO size_t xlsxioread_sheet_last_column_index (xlsxioreadersheet sheethandle) +{ + return sheethandle->lastcolnr; +} + +DLL_EXPORT_XLSXIO unsigned int xlsxioread_sheet_flags (xlsxioreadersheet sheethandle) +{ + return sheethandle->processcallbackdata.flags; +} + +DLL_EXPORT_XLSXIO int xlsxioread_process (xlsxioreader handle, const XLSXIOCHAR* sheetname, unsigned int flags, xlsxioread_process_cell_callback_fn cell_callback, xlsxioread_process_row_callback_fn row_callback, void* callbackdata) +{ + int result = 0; + //determine sheet file name + struct main_sheet_get_rels_callback_data getrelscallbackdata = { + .sheetname = sheetname, + .basepath = NULL, + .sheetrelid = NULL, + .sheetfile = NULL, + .sharedstringsfile = NULL, + .stylesfile = NULL + }; + iterate_files_by_contenttype(handle->zip, xlsx_content_type, main_sheet_get_sheetfile_callback, &getrelscallbackdata, NULL); + if (!getrelscallbackdata.sheetrelid) + iterate_files_by_contenttype(handle->zip, xlsm_content_type, main_sheet_get_sheetfile_callback, &getrelscallbackdata, NULL); + if (!getrelscallbackdata.sheetrelid) + iterate_files_by_contenttype(handle->zip, xltx_content_type, main_sheet_get_sheetfile_callback, &getrelscallbackdata, NULL); + if (!getrelscallbackdata.sheetrelid) + iterate_files_by_contenttype(handle->zip, xltm_content_type, main_sheet_get_sheetfile_callback, &getrelscallbackdata, NULL); + + //process shared strings + struct sharedstringlist* sharedstrings = NULL; + if (getrelscallbackdata.sharedstringsfile && getrelscallbackdata.sharedstringsfile[0]) { + sharedstrings = sharedstringlist_create(); + struct shared_strings_callback_data sharedstringsdata; + shared_strings_callback_data_initialize(&sharedstringsdata, sharedstrings); + if (expat_process_zip_file(handle->zip, getrelscallbackdata.sharedstringsfile, shared_strings_callback_find_sharedstringtable_start, NULL, NULL, &sharedstringsdata, &sharedstringsdata.xmlparser) != 0) { + //no shared strings found + sharedstringlist_destroy(sharedstrings); + sharedstrings = NULL; + } + shared_strings_callback_data_cleanup(&sharedstringsdata); + } + + //process sheet + if (!(flags & XLSXIOREAD_NO_CALLBACK)) { + //use callback mechanism + struct data_sheet_callback_data processcallbackdata; + data_sheet_callback_data_initialize(&processcallbackdata, sharedstrings, flags, cell_callback, row_callback, callbackdata); + expat_process_zip_file(handle->zip, getrelscallbackdata.sheetfile, data_sheet_expat_callback_find_worksheet_start, NULL, NULL, &processcallbackdata, &processcallbackdata.xmlparser); + data_sheet_callback_data_cleanup(&processcallbackdata); + } else { + //use simplified interface by suspending the XML parser when data is found + xlsxioreadersheet sheethandle = (xlsxioreadersheet)callbackdata; + data_sheet_callback_data_initialize(&sheethandle->processcallbackdata, sharedstrings, flags, NULL, NULL, sheethandle); + if ((sheethandle->zipfile = XML_Char_openzip(sheethandle->handle->zip, getrelscallbackdata.sheetfile, 0)) == NULL) { + result = 1; + } + if ((sheethandle->processcallbackdata.xmlparser = expat_process_zip_file_suspendable(sheethandle->zipfile, data_sheet_expat_callback_find_worksheet_start, NULL, NULL, &sheethandle->processcallbackdata)) == NULL) { + result = 2; + } + } + + //clean up + free(getrelscallbackdata.basepath); + free(getrelscallbackdata.sheetrelid); + free(getrelscallbackdata.sheetfile); + free(getrelscallbackdata.sharedstringsfile); + free(getrelscallbackdata.stylesfile); + return result; +} + +//////////////////////////////////////////////////////////////////////// + +struct xlsxio_read_sheetlist_struct { + xlsxioreader handle; + ZIPFILEENTRYTYPE* zipfile; + struct main_sheet_list_callback_data sheetcallbackdata; + XML_Parser xmlparser; + XML_Char* nextsheetname; +}; + +int xlsxioread_list_sheets_resumable_callback (const XLSXIOCHAR* name, void* callbackdata) +{ + //struct main_sheet_list_callback_data* data = (struct main_sheet_list_callback_data*)callbackdata; + xlsxioreadersheetlist data = (xlsxioreadersheetlist)callbackdata; + data->nextsheetname = XML_Char_dup(name); + XML_StopParser(data->xmlparser, XML_TRUE); + return 0; +} + +void xlsxioread_find_main_sheet_file_callback (ZIPFILETYPE* zip, const XML_Char* filename, const XML_Char* contenttype, void* callbackdata) +{ + XML_Char** data = (XML_Char**)callbackdata; + *data = XML_Char_dup(filename); +} + +DLL_EXPORT_XLSXIO xlsxioreadersheetlist xlsxioread_sheetlist_open (xlsxioreader handle) +{ + //determine main sheet name + XML_Char* mainsheetfile = NULL; + iterate_files_by_contenttype(handle->zip, xlsx_content_type, xlsxioread_find_main_sheet_file_callback, &mainsheetfile, NULL); + if (!mainsheetfile) + iterate_files_by_contenttype(handle->zip, xlsm_content_type, xlsxioread_find_main_sheet_file_callback, &mainsheetfile, NULL); + if (!mainsheetfile) + iterate_files_by_contenttype(handle->zip, xltx_content_type, xlsxioread_find_main_sheet_file_callback, &mainsheetfile, NULL); + if (!mainsheetfile) + iterate_files_by_contenttype(handle->zip, xltm_content_type, xlsxioread_find_main_sheet_file_callback, &mainsheetfile, NULL); + if (!mainsheetfile) + return NULL; + //process contents of main sheet + xlsxioreadersheetlist result; + if ((result = (xlsxioreadersheetlist)malloc(sizeof(struct xlsxio_read_sheetlist_struct))) == NULL) + return NULL; + result->handle = handle; + result->sheetcallbackdata.xmlparser = NULL; + result->sheetcallbackdata.callback = xlsxioread_list_sheets_resumable_callback; + result->sheetcallbackdata.callbackdata = result; + result->xmlparser = NULL; + result->nextsheetname = NULL; + if ((result->zipfile = XML_Char_openzip(handle->zip, mainsheetfile, 0)) != NULL) { + result->xmlparser = expat_process_zip_file_suspendable(result->zipfile, main_sheet_list_expat_callback_element_start, NULL, NULL, &result->sheetcallbackdata); + } + //clean up + free(mainsheetfile); + return result; +} + +DLL_EXPORT_XLSXIO void xlsxioread_sheetlist_close (xlsxioreadersheetlist sheetlisthandle) +{ + if (!sheetlisthandle) + return; + if (sheetlisthandle->xmlparser) + XML_ParserFree(sheetlisthandle->xmlparser); + if (sheetlisthandle->zipfile) +#ifdef USE_MINIZIP + unzCloseCurrentFile(sheetlisthandle->zipfile); +#else + zip_fclose(sheetlisthandle->zipfile); +#endif + free(sheetlisthandle->nextsheetname); + free(sheetlisthandle); + +} + +DLL_EXPORT_XLSXIO const XLSXIOCHAR* xlsxioread_sheetlist_next (xlsxioreadersheetlist sheetlisthandle) +{ + if (!sheetlisthandle->zipfile || !sheetlisthandle->xmlparser) + return NULL; + free(sheetlisthandle->nextsheetname); + sheetlisthandle->nextsheetname = NULL; + enum XML_Status status; + if ((status = expat_process_zip_file_resume(sheetlisthandle->zipfile, sheetlisthandle->xmlparser)) != XML_STATUS_SUSPENDED) { + return NULL; + } + return sheetlisthandle->nextsheetname; +} + +//////////////////////////////////////////////////////////////////////// + +DLL_EXPORT_XLSXIO xlsxioreadersheet xlsxioread_sheet_open (xlsxioreader handle, const XLSXIOCHAR* sheetname, unsigned int flags) +{ + xlsxioreadersheet result; + if ((result = (xlsxioreadersheet)malloc(sizeof(struct xlsxio_read_sheet_struct))) == NULL) + return NULL; + result->handle = handle; + result->zipfile = NULL; + result->lastrownr = 0; + result->paddingrow = 0; + result->lastcolnr = 0; + result->paddingcol = 0; + xlsxioread_process(handle, sheetname, flags | XLSXIOREAD_NO_CALLBACK, NULL, NULL, result); /* Note: currently broken when not using XLSXIOREAD_NO_CALLBACK flag */ + return result; +} + +DLL_EXPORT_XLSXIO void xlsxioread_sheet_close (xlsxioreadersheet sheethandle) +{ + if (!sheethandle) + return; + if (sheethandle->processcallbackdata.xmlparser) + XML_ParserFree(sheethandle->processcallbackdata.xmlparser); + data_sheet_callback_data_cleanup(&sheethandle->processcallbackdata); + if (sheethandle->zipfile) +#ifdef USE_MINIZIP + unzCloseCurrentFile(sheethandle->zipfile); +#else + zip_fclose(sheethandle->zipfile); +#endif + free(sheethandle); +} + +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_row (xlsxioreadersheet sheethandle) +{ + enum XML_Status status; + if (!sheethandle) { + return 0; + } + sheethandle->lastcolnr = 0; + //when padding rows don't retrieve new data + if (sheethandle->paddingrow) { + if (sheethandle->paddingrow < sheethandle->processcallbackdata.rownr) { + return 3; + } else { + sheethandle->paddingrow = 0; + return 2; + } + } + sheethandle->paddingcol = 0; + //go to beginning of next row + while ((status = expat_process_zip_file_resume(sheethandle->zipfile, sheethandle->processcallbackdata.xmlparser)) == XML_STATUS_SUSPENDED && sheethandle->processcallbackdata.colnr != 0) { + } + return (status == XML_STATUS_SUSPENDED ? 1 : 0); +} + +DLL_EXPORT_XLSXIO XLSXIOCHAR* xlsxioread_sheet_next_cell (xlsxioreadersheet sheethandle) +{ + XML_Char* result; + if (!sheethandle) + return NULL; + //if (!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && sheethandle->paddingcol) { + if ((!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && sheethandle->paddingcol) || (!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_ROWS) && sheethandle->paddingrow)) { + if ((/*sheethandle->processcallbackdata.cols > 0 &&*/ sheethandle->paddingcol > sheethandle->processcallbackdata.cols) || (sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_CELLS)) { + //last empty column added, finish row + sheethandle->paddingcol = 0; + //when padding rows prepare for the next one + if (sheethandle->paddingrow) { + sheethandle->lastrownr++; + sheethandle->paddingrow++; + if (sheethandle->paddingrow + 1 < sheethandle->processcallbackdata.rownr) { + sheethandle->paddingcol = 1; + } + } + return NULL; + } else if ((sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && sheethandle->processcallbackdata.cols > 0 && sheethandle->lastcolnr >= sheethandle->processcallbackdata.cols) { + //end of line when out of bounds + return NULL; + } else { + //add another empty column + sheethandle->paddingcol++; + sheethandle->lastcolnr++; + return XML_Char_dup(X("")); + } + } else if ((sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && sheethandle->processcallbackdata.cols > 0 && sheethandle->lastcolnr >= sheethandle->processcallbackdata.cols) { + //end of line when out of bounds + return NULL; + } + //get value + if (!sheethandle->processcallbackdata.celldata) + if (expat_process_zip_file_resume(sheethandle->zipfile, sheethandle->processcallbackdata.xmlparser) != XML_STATUS_SUSPENDED) + sheethandle->processcallbackdata.celldata = NULL; + //insert empty rows if needed + if (!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_ROWS) && sheethandle->lastrownr + 1 < sheethandle->processcallbackdata.rownr) { + sheethandle->paddingrow = sheethandle->lastrownr + 1; + sheethandle->paddingcol = sheethandle->processcallbackdata.colnr*0 + 1; + return xlsxioread_sheet_next_cell(sheethandle); + } + //insert empty column before if needed + if (!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_CELLS)) { + if (sheethandle->lastcolnr + 1 < sheethandle->processcallbackdata.colnr) { + if (0) { + //if ((sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EXTRA_CELLS) && sheethandle->processcallbackdata.cols > 0 && sheethandle->lastcolnr >= sheethandle->processcallbackdata.cols) { + //end of line when out of bounds + return NULL; + } else { + sheethandle->lastcolnr++; + return XML_Char_dup(X("")); + } + } + } + result = sheethandle->processcallbackdata.celldata; + sheethandle->processcallbackdata.celldata = NULL; + //end of row + if (!result) { + sheethandle->lastrownr = sheethandle->processcallbackdata.rownr; + //insert empty column at end of row if needed + if (!(sheethandle->processcallbackdata.flags & XLSXIOREAD_SKIP_EMPTY_CELLS) && sheethandle->processcallbackdata.colnr < sheethandle->processcallbackdata.cols) { + sheethandle->paddingcol = sheethandle->lastcolnr + 1; + return xlsxioread_sheet_next_cell(sheethandle); + } + } + sheethandle->lastcolnr = sheethandle->processcallbackdata.colnr; + return result; +} + +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_string (xlsxioreadersheet sheethandle, XLSXIOCHAR** pvalue) +{ + XML_Char* result; + if (!sheethandle) + return -1; + if ((result = xlsxioread_sheet_next_cell(sheethandle)) == NULL) + return 0; + if (pvalue) + *pvalue = result; + return 1; +} + +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_int (xlsxioreadersheet sheethandle, int64_t* pvalue) +{ + XML_Char* result; + int status; + if ((result = xlsxioread_sheet_next_cell(sheethandle)) == NULL) + return 0; + if (pvalue) { + status = XML_Char_sscanf(result, X("%" PRIi64), pvalue); + if (status == EOF || status == 0) + *pvalue = 0; + //alternative: use strtoimax() + } + free(result); + return 1; +} + +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_float (xlsxioreadersheet sheethandle, double* pvalue) +{ + XML_Char* result; + if ((result = xlsxioread_sheet_next_cell(sheethandle)) == NULL) + return 0; + if (pvalue) + *pvalue = XML_Char_tod(result); + free(result); + return 1; +} + +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_datetime (xlsxioreadersheet sheethandle, time_t* pvalue) +{ + XML_Char* result; + if ((result = xlsxioread_sheet_next_cell(sheethandle)) == NULL) + return 0; + if (pvalue) { + double value = XML_Char_tod(result); + if (value != 0) { + value = (value - 25569) * 86400; //conversion from Excel to Unix timestamp + } + *pvalue = (time_t)value; + } + free(result); + return 1; +} + +DLL_EXPORT_XLSXIO void xlsxioread_free (XLSXIOCHAR* data) +{ + free(data); +} diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.h b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.h new file mode 100644 index 0000000..7ecd1bf --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read.h @@ -0,0 +1,326 @@ +/***************************************************************************** +Copyright (C) 2016 Brecht Sanders All Rights Reserved + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*****************************************************************************/ + +/** + * @file xlsxio_read.h + * @brief XLSX I/O header file for reading .xlsx files. + * @author Brecht Sanders + * @date 2016 + * @copyright MIT + * + * Include this header file to use XLSX I/O for reading .xlsx files and + * link with -lxlsxio_read. + * This header provides both advanced methods using callback functions and + * simple methods for iterating through data. + */ + +#ifndef INCLUDED_XLSXIO_READ_H +#define INCLUDED_XLSXIO_READ_H + +#include +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif +#include + +/*! \brief character type used (usually char, but when XML_UNICODE is defined wchar_t) */ +#ifndef XLSXIOCHAR +#if defined(XML_UNICODE_WCHAR_T) +#warning Building with XML_UNICODE_WCHAR_T and -fshort-wchar is not supported unless all other linked libraries and programs are also compiled with -fshort-wchar +#elif !defined(XML_UNICODE) +#define XLSXIOCHAR char +#else +#include +#define XLSXIOCHAR wchar_t +#endif +#endif + +/*! \cond PRIVATE */ +#ifndef DLL_EXPORT_XLSXIO +#ifdef _WIN32 +#if defined(BUILD_XLSXIO_DLL) || defined(BUILD_XLSXIO_SHARED) || defined(xlsxio_write_SHARED_EXPORTS) +#define DLL_EXPORT_XLSXIO __declspec(dllexport) +#elif !defined(STATIC) && !defined(BUILD_XLSXIO_STATIC) && !defined(BUILD_XLSXIO) +#define DLL_EXPORT_XLSXIO __declspec(dllimport) +#else +#define DLL_EXPORT_XLSXIO +#endif +#else +#define DLL_EXPORT_XLSXIO +#endif +#endif +/*! \endcond */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*! \brief get xlsxio_write version + * \param pmajor pointer to integer that will receive major version number + * \param pminor pointer to integer that will receive minor version number + * \param pmicro pointer to integer that will receive micro version number + * \sa xlsxiowrite_get_version_string() + */ +DLL_EXPORT_XLSXIO void xlsxioread_get_version (int* pmajor, int* pminor, int* pmicro); + +/*! \brief get xlsxio_write version string + * \return version string + * \sa xlsxiowrite_get_version() + */ +DLL_EXPORT_XLSXIO const XLSXIOCHAR* xlsxioread_get_version_string (); + +/*! \brief read handle for .xlsx object */ +typedef struct xlsxio_read_struct* xlsxioreader; + +/*! \brief open .xlsx file + * \param filename path of .xlsx file to open + * \return read handle for .xlsx object or NULL on error + * \sa xlsxioread_close() + */ +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open (const char* filename); + +/*! \brief open .xlsx file + * \param filehandle file handle of .xlsx file opened with read access in binary mode + * \return read handle for .xlsx object or NULL on error + * \sa xlsxioread_close() + */ +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open_filehandle (int filehandle); + +/*! \brief open .xlsx from memory buffer + * \param data memory buffer containing .xlsx file (data must remain valid as long as any xlsxioread_ functions are called) + * \param datalen size of memory buffer containing .xlsx file + * \param freedata if non-zero data will be freed by xlsxioread_close() + * \return read handle for .xlsx object or NULL on error + * \sa xlsxioread_close() + */ +DLL_EXPORT_XLSXIO xlsxioreader xlsxioread_open_memory (void* data, uint64_t datalen, int freedata); + +/*! \brief close .xlsx file + * \param handle read handle for .xlsx object + * \sa xlsxioread_open() + */ +DLL_EXPORT_XLSXIO void xlsxioread_close (xlsxioreader handle); + + + +/*! \brief type of pointer to callback function for listing worksheets + * \param name name of worksheet + * \param callbackdata callback data passed to xlsxioread_list_sheets + * \return zero to continue, non-zero to abort + * \sa xlsxioread_list_sheets() + */ +typedef int (*xlsxioread_list_sheets_callback_fn)(const XLSXIOCHAR* name, void* callbackdata); + +/*! \brief list worksheets in .xlsx file + * \param handle read handle for .xlsx object + * \param callback callback function called for each worksheet + * \param callbackdata custom data as passed to quickmail_add_body_custom/quickmail_add_attachment_custom + * \sa xlsxioread_list_sheets_callback_fn + */ +DLL_EXPORT_XLSXIO void xlsxioread_list_sheets (xlsxioreader handle, xlsxioread_list_sheets_callback_fn callback, void* callbackdata); + + + +/*! \brief possible values for the flags parameter of xlsxioread_process() + * \sa xlsxioread_process() + * \name XLSXIOREAD_SKIP_* + * \{ + */ +/*! \brief don't skip any rows or cells \hideinitializer */ +#define XLSXIOREAD_SKIP_NONE 0 +/*! \brief skip empty rows (note: cells may appear empty while they actually contain data) \hideinitializer */ +#define XLSXIOREAD_SKIP_EMPTY_ROWS 0x01 +/*! \brief skip empty cells \hideinitializer */ +#define XLSXIOREAD_SKIP_EMPTY_CELLS 0x02 +/*! \brief skip empty rows and cells \hideinitializer */ +#define XLSXIOREAD_SKIP_ALL_EMPTY (XLSXIOREAD_SKIP_EMPTY_ROWS | XLSXIOREAD_SKIP_EMPTY_CELLS) +/*! \brief skip extra cells to the right of the rightmost header cell \hideinitializer */ +#define XLSXIOREAD_SKIP_EXTRA_CELLS 0x04 +/*! \brief skip hidden rows \hideinitializer */ +#define XLSXIOREAD_SKIP_HIDDEN_ROWS 0x08 +/*! @} */ + +/*! \brief type of pointer to callback function for processing a worksheet cell value + * \param row row number (first row is 1) + * \param col column number (first column is 1) + * \param value value of cell (note: formulas are not calculated) + * \param callbackdata callback data passed to xlsxioread_process + * \return zero to continue, non-zero to abort + * \sa xlsxioread_process() + * \sa xlsxioread_process_row_callback_fn + */ +typedef int (*xlsxioread_process_cell_callback_fn)(size_t row, size_t col, const XLSXIOCHAR* value, void* callbackdata); + +/*! \brief type of pointer to callback function for processing the end of a worksheet row + * \param row row number (first row is 1) + * \param maxcol maximum column number on this row (first column is 1) + * \param callbackdata callback data passed to xlsxioread_process + * \return zero to continue, non-zero to abort + * \sa xlsxioread_process() + * \sa xlsxioread_process_cell_callback_fn + */ +typedef int (*xlsxioread_process_row_callback_fn)(size_t row, size_t maxcol, void* callbackdata); + +/*! \brief process all rows and columns of a worksheet in an .xlsx file + * \param handle read handle for .xlsx object + * \param sheetname worksheet name (NULL for first sheet) + * \param flags XLSXIOREAD_SKIP_ flag(s) to determine how data is processed + * \param cell_callback callback function called for each cell + * \param row_callback callback function called after each row + * \param callbackdata callback data passed to xlsxioread_process + * \return zero on success, non-zero on error + * \sa xlsxioread_process_row_callback_fn + * \sa xlsxioread_process_cell_callback_fn + */ +DLL_EXPORT_XLSXIO int xlsxioread_process (xlsxioreader handle, const XLSXIOCHAR* sheetname, unsigned int flags, xlsxioread_process_cell_callback_fn cell_callback, xlsxioread_process_row_callback_fn row_callback, void* callbackdata); + + + +/*! \brief read handle for list of worksheet names */ +typedef struct xlsxio_read_sheetlist_struct* xlsxioreadersheetlist; + +/*! \brief open list of worksheet names + * \param handle read handle for .xlsx object + * \sa xlsxioread_sheetlist_close() + * \sa xlsxioread_open() + */ +DLL_EXPORT_XLSXIO xlsxioreadersheetlist xlsxioread_sheetlist_open (xlsxioreader handle); + +/*! \brief close worksheet + * \param sheetlisthandle read handle for worksheet object + * \sa xlsxioread_sheetlist_open() + */ +DLL_EXPORT_XLSXIO void xlsxioread_sheetlist_close (xlsxioreadersheetlist sheetlisthandle); + +/*! \brief get next worksheet name + * \param sheetlisthandle read handle for worksheet object + * \return name of worksheet or NULL if no more worksheets are available + * \sa xlsxioread_sheetlist_open() + */ +DLL_EXPORT_XLSXIO const XLSXIOCHAR* xlsxioread_sheetlist_next (xlsxioreadersheetlist sheetlisthandle); + + + +/*! \brief read handle for worksheet object */ +typedef struct xlsxio_read_sheet_struct* xlsxioreadersheet; + +/*! \brief get index of last row read from worksheet (returns 0 if no row was read yet) + * \param sheethandle read handle for worksheet object + * \sa xlsxioread_sheet_open() + */ +DLL_EXPORT_XLSXIO size_t xlsxioread_sheet_last_row_index (xlsxioreadersheet sheethandle); + +/*! \brief get index of last column read from current row in worksheet (returns 0 if no column was read yet) + * \param sheethandle read handle for worksheet object + * \sa xlsxioread_sheet_open() + */ +DLL_EXPORT_XLSXIO size_t xlsxioread_sheet_last_column_index (xlsxioreadersheet sheethandle); + +/*! \brief get flags used to open worksheet + * \param sheethandle read handle for worksheet object + * \sa xlsxioread_sheet_open() + */ +DLL_EXPORT_XLSXIO unsigned int xlsxioread_sheet_flags (xlsxioreadersheet sheethandle); + +/*! \brief open worksheet + * \param handle read handle for .xlsx object + * \param sheetname worksheet name (NULL for first sheet) + * \param flags XLSXIOREAD_SKIP_ flag(s) to determine how data is processed + * \return read handle for worksheet object or NULL in case of error + * \sa xlsxioread_sheet_close() + * \sa xlsxioread_open() + */ +DLL_EXPORT_XLSXIO xlsxioreadersheet xlsxioread_sheet_open (xlsxioreader handle, const XLSXIOCHAR* sheetname, unsigned int flags); + +/*! \brief close worksheet + * \param sheethandle read handle for worksheet object + * \sa xlsxioread_sheet_open() + */ +DLL_EXPORT_XLSXIO void xlsxioread_sheet_close (xlsxioreadersheet sheethandle); + +/*! \brief get next row from worksheet (to be called before each row) + * \param sheethandle read handle for worksheet object + * \return non-zero if a new row is available + * \sa xlsxioread_sheet_open() + */ +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_row (xlsxioreadersheet sheethandle); + +/*! \brief get next cell from worksheet + * \param sheethandle read handle for worksheet object + * \return value (caller must free the result using xlsxioread_free()) or NULL if no more cells are available in the current row + * \sa xlsxioread_sheet_open() + * \sa xlsxioread_free() + */ +DLL_EXPORT_XLSXIO XLSXIOCHAR* xlsxioread_sheet_next_cell (xlsxioreadersheet sheethandle); + +/*! \brief get next cell from worksheet as a string + * \param sheethandle read handle for worksheet object + * \param pvalue pointer where string will be stored if data is available (caller must free the result using xlsxioread_free()) + * \return non-zero if a new cell was available in the current row + * \sa xlsxioread_sheet_open() + * \sa xlsxioread_sheet_next_cell() + * \sa xlsxioread_free() + */ +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_string (xlsxioreadersheet sheethandle, XLSXIOCHAR** pvalue); + +/*! \brief get next cell from worksheet as an integer + * \param sheethandle read handle for worksheet object + * \param pvalue pointer where integer will be stored if data is available + * \return non-zero if a new cell was available in the current row + * \sa xlsxioread_sheet_open() + * \sa xlsxioread_sheet_next_cell() + */ +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_int (xlsxioreadersheet sheethandle, int64_t* pvalue); + +/*! \brief get next cell from worksheet as a floating point value + * \param sheethandle read handle for worksheet object + * \param pvalue pointer where floating point value will be stored if data is available + * \return non-zero if a new cell was available in the current row + * \sa xlsxioread_sheet_open() + * \sa xlsxioread_sheet_next_cell() + */ +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_float (xlsxioreadersheet sheethandle, double* pvalue); + +/*! \brief get next cell from worksheet as date and time data + * \param sheethandle read handle for worksheet object + * \param pvalue pointer where date and time data will be stored if data is available + * \return non-zero if a new cell was available in the current row + * \sa xlsxioread_sheet_open() + * \sa xlsxioread_sheet_next_cell() + */ +DLL_EXPORT_XLSXIO int xlsxioread_sheet_next_cell_datetime (xlsxioreadersheet sheethandle, time_t* pvalue); + +/*! \brief free memory allocated by the library + * \param data memory to be freed + * \sa xlsxioread_sheet_next_cell() + * \sa xlsxioread_sheet_next_cell_string() + */ +DLL_EXPORT_XLSXIO void xlsxioread_free (XLSXIOCHAR* data); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.c b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.c new file mode 100644 index 0000000..b83de84 --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.c @@ -0,0 +1,208 @@ +#include "xlsxio_private.h" +#include "xlsxio_read_sharedstrings.h" +#include +//#include +#include + +#if defined(_MSC_VER) || (defined(__MINGW32__) && !defined(__MINGW64__)) +#define strcasecmp _stricmp +#endif +#ifdef _WIN32 +#define wcscasecmp _wcsicmp +#endif + +struct sharedstringlist* sharedstringlist_create () +{ + struct sharedstringlist* sharedstrings; + if ((sharedstrings = (struct sharedstringlist*)malloc(sizeof(struct sharedstringlist))) != NULL) { + sharedstrings->strings = NULL; + sharedstrings->numstrings = 0; + } + return sharedstrings; +} + +void sharedstringlist_destroy (struct sharedstringlist* sharedstrings) +{ + if (sharedstrings) { + size_t i; + for (i = 0; i < sharedstrings->numstrings; i++) + free(sharedstrings->strings[i]); + free(sharedstrings->strings); + free(sharedstrings); + } +} + +size_t sharedstringlist_size (struct sharedstringlist* sharedstrings) +{ + if (!sharedstrings) + return 0; + return sharedstrings->numstrings; +} + +int sharedstringlist_add_buffer (struct sharedstringlist* sharedstrings, const XML_Char* data, size_t datalen) +{ + XML_Char* s; + XML_Char** p; + if (!sharedstrings) + return 1; + if (!data) { + s = NULL; + } else { + if ((s = XML_Char_malloc(datalen + 1)) == NULL) + return 2; + XML_Char_poscpy(s, 0, data, datalen); + s[datalen] = 0; + } + if ((p = (XML_Char**)realloc(sharedstrings->strings, (sharedstrings->numstrings + 1) * sizeof(sharedstrings->strings[0]))) == NULL) { + free(s); + return 3; + } + sharedstrings->strings = p; + sharedstrings->strings[sharedstrings->numstrings++] = s; + return 0; +} + +int sharedstringlist_add_string (struct sharedstringlist* sharedstrings, const XML_Char* data) +{ + return sharedstringlist_add_buffer(sharedstrings, data, (data ? XML_Char_len(data) : 0)); +} + +const XML_Char* sharedstringlist_get (struct sharedstringlist* sharedstrings, size_t index) +{ + if (!sharedstrings || index >= sharedstrings->numstrings) + return NULL; + return sharedstrings->strings[index]; +} + +//////////////////////////////////////////////////////////////////////// + +void shared_strings_callback_data_initialize (struct shared_strings_callback_data* data, struct sharedstringlist* sharedstrings) +{ + data->xmlparser = NULL; + data->sharedstrings = sharedstrings; + data->insst = 0; + data->insi = 0; + data->intext = 0; + data->text = NULL; + data->textlen = 0; + data->skiptag = NULL; + data->skiptagcount = 0; + data->skip_start = NULL; + data->skip_end = NULL; + data->skip_data = NULL; +} + +void shared_strings_callback_data_cleanup (struct shared_strings_callback_data* data) +{ + free(data->text); + free(data->skiptag); +} + +void shared_strings_callback_skip_tag_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (name && XML_Char_icmp(name, data->skiptag) == 0) { + //increment nesting level + data->skiptagcount++; + } +} + +void shared_strings_callback_skip_tag_end (void* callbackdata, const XML_Char* name) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (!name || XML_Char_icmp(name, data->skiptag) == 0) { + if (--data->skiptagcount == 0) { + //restore handlers when done skipping + XML_SetElementHandler(data->xmlparser, data->skip_start, data->skip_end); + XML_SetCharacterDataHandler(data->xmlparser, data->skip_data); + free(data->skiptag); + data->skiptag = NULL; + } + } +} + +void shared_strings_callback_find_sharedstringtable_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("sst")) == 0) { + XML_SetElementHandler(data->xmlparser, shared_strings_callback_find_shared_stringitem_start, NULL); + } +} + +void shared_strings_callback_find_sharedstringtable_end (void* callbackdata, const XML_Char* name) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("sst")) == 0) { + XML_SetElementHandler(data->xmlparser, shared_strings_callback_find_sharedstringtable_start, NULL); + } +} + +void shared_strings_callback_find_shared_stringitem_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("si")) == 0) { + if (data->text) + free(data->text); + data->text = NULL; + data->textlen = 0; + XML_SetElementHandler(data->xmlparser, shared_strings_callback_find_shared_string_start, shared_strings_callback_find_sharedstringtable_end); + } +} + +void shared_strings_callback_find_shared_stringitem_end (void* callbackdata, const XML_Char* name) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("si")) == 0) { + sharedstringlist_add_buffer(data->sharedstrings, data->text, data->textlen); + if (data->text) + free(data->text); + data->text = NULL; + data->textlen = 0; + XML_SetElementHandler(data->xmlparser, shared_strings_callback_find_shared_stringitem_start, shared_strings_callback_find_sharedstringtable_end); + } else { + shared_strings_callback_find_sharedstringtable_end(callbackdata, name); + } +} + +void shared_strings_callback_find_shared_string_start (void* callbackdata, const XML_Char* name, const XML_Char** atts) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("t")) == 0) { + XML_SetElementHandler(data->xmlparser, NULL, shared_strings_callback_find_shared_string_end); + XML_SetCharacterDataHandler(data->xmlparser, shared_strings_callback_string_data); + } else if (XML_Char_icmp(name, X("rPh")) == 0) { + data->skiptag = XML_Char_dup(name); + data->skiptagcount = 1; + data->skip_start = shared_strings_callback_find_shared_string_start; + data->skip_end = shared_strings_callback_find_shared_stringitem_end; + data->skip_data = NULL; + XML_SetElementHandler(data->xmlparser, shared_strings_callback_skip_tag_start, shared_strings_callback_skip_tag_end); + XML_SetCharacterDataHandler(data->xmlparser, NULL); + } +} + +void shared_strings_callback_find_shared_string_end (void* callbackdata, const XML_Char* name) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if (XML_Char_icmp(name, X("t")) == 0) { + XML_SetElementHandler(data->xmlparser, shared_strings_callback_find_shared_string_start, shared_strings_callback_find_shared_stringitem_end); + XML_SetCharacterDataHandler(data->xmlparser, NULL); + } else { + shared_strings_callback_find_shared_stringitem_end(callbackdata, name); + } +} + +void shared_strings_callback_string_data (void* callbackdata, const XML_Char* buf, int buflen) +{ + struct shared_strings_callback_data* data = (struct shared_strings_callback_data*)callbackdata; + if ((data->text = XML_Char_realloc(data->text, data->textlen + buflen)) == NULL) { + //memory allocation error + data->textlen = 0; + } else { + XML_Char_poscpy(data->text, data->textlen, buf, buflen); + data->textlen += buflen; + } +} + +//////////////////////////////////////////////////////////////////////// + diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.h b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.h new file mode 100644 index 0000000..51657ac --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_read_sharedstrings.h @@ -0,0 +1,56 @@ +#ifndef INCLUDED_XLSXIO_READ_SHAREDSTRINGS_H +#define INCLUDED_XLSXIO_READ_SHAREDSTRINGS_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct sharedstringlist { + XML_Char** strings; + size_t numstrings; +}; + +struct sharedstringlist* sharedstringlist_create (); +void sharedstringlist_destroy (struct sharedstringlist* sharedstrings); +size_t sharedstringlist_size (struct sharedstringlist* sharedstrings); +int sharedstringlist_add_buffer (struct sharedstringlist* sharedstrings, const XML_Char* data, size_t datalen); +int sharedstringlist_add_string (struct sharedstringlist* sharedstrings, const XML_Char* data); +const XML_Char* sharedstringlist_get (struct sharedstringlist* sharedstrings, size_t index); + +//////////////////////////////////////////////////////////////////////// + +struct shared_strings_callback_data { + XML_Parser xmlparser; + struct sharedstringlist* sharedstrings; + int insst; + int insi; + int intext; + XML_Char* text; + size_t textlen; + XML_Char* skiptag; //tag to skip + size_t skiptagcount; //nesting level for current tag to skip + XML_StartElementHandler skip_start; //start handler to set after skipping + XML_EndElementHandler skip_end; //end handler to set after skipping + XML_CharacterDataHandler skip_data; //data handler to set after skipping +}; + +void shared_strings_callback_data_initialize (struct shared_strings_callback_data* data, struct sharedstringlist* sharedstrings); +void shared_strings_callback_data_cleanup (struct shared_strings_callback_data* data); +void shared_strings_callback_skip_tag_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void shared_strings_callback_skip_tag_end (void* callbackdata, const XML_Char* name); +void shared_strings_callback_find_sharedstringtable_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void shared_strings_callback_find_sharedstringtable_end (void* callbackdata, const XML_Char* name); +void shared_strings_callback_find_shared_stringitem_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void shared_strings_callback_find_shared_stringitem_end (void* callbackdata, const XML_Char* name); +void shared_strings_callback_find_shared_string_start (void* callbackdata, const XML_Char* name, const XML_Char** atts); +void shared_strings_callback_find_shared_string_end (void* callbackdata, const XML_Char* name); +void shared_strings_callback_string_data (void* callbackdata, const XML_Char* buf, int buflen); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/3rdparty/libs/fileext/excel/xlsxio/xlsxio_version.h b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_version.h new file mode 100644 index 0000000..c742279 --- /dev/null +++ b/3rdparty/libs/fileext/excel/xlsxio/xlsxio_version.h @@ -0,0 +1,80 @@ +/***************************************************************************** +Copyright (C) 2016 Brecht Sanders All Rights Reserved + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*****************************************************************************/ + +/** + * @file xlsxio_version.h + * @brief XLSX I/O header file with version information. + * @author Brecht Sanders + * + * Only use this header file when version information is needed at compile + * time. Otherwise the version functions in the libraries should be used. + * \sa XLSXIO_VERSION_* + * \sa XLSXIO_VERSION_STRING + * \sa xlsxioread_get_version() + * \sa xlsxioread_get_version_string() + * \sa xlsxiowrite_get_version() + * \sa xlsxiowrite_get_version_string() + */ + +#ifndef INCLUDED_XLSXIO_VERSION_H +#define INCLUDED_XLSXIO_VERSION_H + +/*! \brief version number constants + * \sa xlsxioread_get_version() + * \sa xlsxiowrite_get_version() + * \name XLSXIO_VERSION_* + * \{ + */ +/*! \brief major version number */ +#define XLSXIO_VERSION_MAJOR 0 +/*! \brief minor version number */ +#define XLSXIO_VERSION_MINOR 2 +/*! \brief micro version number */ +#define XLSXIO_VERSION_MICRO 36 +/*! @} */ + +/*! \cond PRIVATE */ +#ifndef XML_UNICODE_WCHAR_T +#define XLSXIO_VERSION_STRINGIZE_(major, minor, micro) #major"."#minor"."#micro +#else +#define XLSXIO_VERSION_STRINGIZE_(major, minor, micro) L ## #major"."#minor"."#micro +#endif +#define XLSXIO_VERSION_STRINGIZE(major, minor, micro) XLSXIO_VERSION_STRINGIZE_(major, minor, micro) +/*! \endcond */ + +/*! \brief string with dotted version number \hideinitializer */ +#define XLSXIO_VERSION_STRING XLSXIO_VERSION_STRINGIZE(XLSXIO_VERSION_MAJOR, XLSXIO_VERSION_MINOR, XLSXIO_VERSION_MICRO) + +/*! \brief integer version of the library \hideinitializer */ +#define XLSXIO_VERSION_ID (XLSXIO_VERSION_MAJOR * 10000 + XLSXIO_VERSION_MINOR * 100 + XLSXIO_VERSION_MICRO) + +/*! \brief string with name of XLSX I/O reading library */ +#define XLSXIOREAD_NAME "libxlsxio_read" +/*! \brief string with name of XLSX I/O writing library */ +#define XLSXIOWRITE_NAME "libxlsxio_write" + +/*! \brief string with name and version of XLSX I/O reading library \hideinitializer */ +#define XLSXIOREAD_FULLNAME XLSXIOREAD_NAME " " XLSXIO_VERSION_STRING +/*! \brief string with name and version of XLSX I/O writing library \hideinitializer */ +#define XLSXIOWRITE_FULLNAME XLSXIOWRITE_NAME " " XLSXIO_VERSION_STRING + +#endif diff --git a/CMakeLists.txt b/CMakeLists.txt index e31b2bd..bf12539 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(docparser VERSION 1.0.0 LANGUAGES CXX) +project(docparser VERSION 1.0.0 LANGUAGES C CXX) # 设置C++标准 set(CMAKE_CXX_STANDARD 17) @@ -24,6 +24,7 @@ pkg_check_modules(DEPS_WITHOUT_MAGIC REQUIRED libxml-2.0 uuid tinyxml2 + expat ) # 单独处理libmagic依赖,兼容新旧环境 @@ -59,8 +60,10 @@ endif() set(DEPS_LIBRARIES ${DEPS_WITHOUT_MAGIC_LIBRARIES} ${LIBMAGIC_LIBRARIES}) set(DEPS_INCLUDE_DIRS ${DEPS_WITHOUT_MAGIC_INCLUDE_DIRS} ${LIBMAGIC_INCLUDE_DIRS}) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + # 添加子目录 -add_subdirectory(src) +add_subdirectory(src) # 添加测试选项,默认构建 option(BUILD_TESTS "Build test applications" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ee42c6b..2e147e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,70 @@ +# xlsxio 和 libxls 源码路径(本地) +set(XLSXIO_SRC_DIR ${CMAKE_SOURCE_DIR}/3rdparty/libs/fileext/excel/xlsxio) +set(LIBXLS_SRC_DIR ${CMAKE_SOURCE_DIR}/3rdparty/libs/fileext/excel/libxls) + +# 查找 xlsxio 依赖(expat + minizip + zlib) +pkg_check_modules(EXPAT REQUIRED expat) +pkg_check_modules(MINIZIP REQUIRED minizip) +pkg_check_modules(ZLIB REQUIRED zlib) + +# 创建 xlsxio_read 静态库(本地源码) +add_library(xlsxio_read STATIC + ${XLSXIO_SRC_DIR}/xlsxio_read.c + ${XLSXIO_SRC_DIR}/xlsxio_read_sharedstrings.c +) +target_compile_definitions(xlsxio_read + PRIVATE + USE_MINIZIP +) +target_include_directories(xlsxio_read + PUBLIC + ${XLSXIO_SRC_DIR} + PRIVATE + ${EXPAT_INCLUDE_DIRS} + ${MINIZIP_INCLUDE_DIRS} + ${ZLIB_INCLUDE_DIRS} +) +target_link_libraries(xlsxio_read + PRIVATE + ${EXPAT_LIBRARIES} + ${MINIZIP_LIBRARIES} + ${ZLIB_LIBRARIES} +) +set_target_properties(xlsxio_read PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# 生成 libxls 需要的 config.h +set(LIBXLS_CONFIG_DIR ${CMAKE_CURRENT_BINARY_DIR}/libxls_config) +file(WRITE ${LIBXLS_CONFIG_DIR}/config.h + "#define LIBXLS_MAJOR_VERSION 1\n" + "#define LIBXLS_MINOR_VERSION 6\n" + "#define LIBXLS_MICRO_VERSION 3\n" + "#define PACKAGE_VERSION \"1.6.3\"\n" + "#define HAVE_ICONV 1\n" + "#define ICONV_CONST \n" + "#define HAVE_STRDUP 1\n" + "#define HAVE_WCHAR_H 1\n" +) + +# 创建 libxls 静态库(本地源码) +add_library(xls STATIC + ${LIBXLS_SRC_DIR}/xls.c + ${LIBXLS_SRC_DIR}/ole.c + ${LIBXLS_SRC_DIR}/xlstool.c + ${LIBXLS_SRC_DIR}/endian.c + ${LIBXLS_SRC_DIR}/locale.c +) +target_include_directories(xls + PUBLIC + ${LIBXLS_SRC_DIR}/include + PRIVATE + ${LIBXLS_CONFIG_DIR} +) +target_compile_definitions(xls + PRIVATE + _XOPEN_SOURCE=700 +) +set_target_properties(xls PROPERTIES POSITION_INDEPENDENT_CODE ON) + # 收集源文件 file(GLOB_RECURSE SRC_FILES_MAIN "${CMAKE_CURRENT_SOURCE_DIR}/*.h" @@ -8,7 +75,6 @@ file(GLOB_RECURSE SRC_FILES_3RDPARTY_LIBS "${CMAKE_SOURCE_DIR}/3rdparty/libs/*.cpp" "${CMAKE_SOURCE_DIR}/3rdparty/libs/*.hpp" "${CMAKE_SOURCE_DIR}/3rdparty/libs/*.h" - ) file(GLOB_RECURSE SRC_FILES_3RDPARTY_UTILS @@ -59,6 +125,8 @@ target_include_directories(docparser target_link_libraries(docparser PRIVATE ${DEPS_LIBRARIES} + xlsxio_read + xls ) # 安装目标 @@ -85,4 +153,4 @@ configure_file( install(FILES ${CMAKE_CURRENT_BINARY_DIR}/docparser.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig -) +)