/* * rapidcsv.h * * URL: https://github.com/d99kris/rapidcsv * Version: 8.83 * * Copyright (C) 2017-2024 Kristofer Berggren * All rights reserved. * * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details. * */ #pragma once #include #include #include #ifdef HAS_CODECVT #include #include #endif #include #include #include #include #include #include #include #include #include #include namespace rapidcsv { #if defined(_MSC_VER) static const bool sPlatformHasCR = true; #else static const bool sPlatformHasCR = false; #endif static const std::vector s_Utf8BOM = { '\xef', '\xbb', '\xbf' }; /** * @brief Datastructure holding parameters controlling how invalid numbers (including * empty strings) should be handled. */ struct ConverterParams { /** * @brief Constructor * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be * converted to a default numerical value, instead of causing * an exception to be thrown (default). * @param pDefaultFloat floating-point default value to represent invalid numbers. * @param pDefaultInteger integer default value to represent invalid numbers. * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default * true). */ explicit ConverterParams(const bool pHasDefaultConverter = false, const long double pDefaultFloat = std::numeric_limits::signaling_NaN(), const long long pDefaultInteger = 0, const bool pNumericLocale = true) : mHasDefaultConverter(pHasDefaultConverter) , mDefaultFloat(pDefaultFloat) , mDefaultInteger(pDefaultInteger) , mNumericLocale(pNumericLocale) { } /** * @brief specifies if conversion of non-numerical strings shall be converted to a default * numerical value, instead of causing an exception to be thrown (default). */ bool mHasDefaultConverter; /** * @brief floating-point default value to represent invalid numbers. */ long double mDefaultFloat; /** * @brief integer default value to represent invalid numbers. */ long long mDefaultInteger; /** * @brief specifies whether to honor LC_NUMERIC locale. */ bool mNumericLocale; }; /** * @brief Exception thrown when attempting to access Document data in a datatype which * is not supported by the Converter class. */ class no_converter : public std::exception { public: /** * @brief Provides details about the exception * @returns an explanatory string */ const char* what() const throw() override { return "unsupported conversion datatype"; } }; /** * @brief Class providing conversion to/from numerical datatypes and strings. Only * intended for rapidcsv internal usage, but exposed externally to allow * specialization for custom datatype conversions. */ template class Converter { public: /** * @brief Constructor * @param pConverterParams specifies how conversion of non-numerical values to * numerical datatype shall be handled. */ Converter(const ConverterParams& pConverterParams) : mConverterParams(pConverterParams) { } /** * @brief Converts numerical value to string representation. * @param pVal numerical value * @param pStr output string */ void ToStr(const T& pVal, std::string& pStr) const { if (typeid(T) == typeid(int) || typeid(T) == typeid(long) || typeid(T) == typeid(long long) || typeid(T) == typeid(unsigned) || typeid(T) == typeid(unsigned long) || typeid(T) == typeid(unsigned long long) || typeid(T) == typeid(long double) || typeid(T) == typeid(char)) { std::ostringstream out; out << pVal; pStr = out.str(); } else if (typeid(T) == typeid(float)) { std::ostringstream out; out << std::setprecision(9) << pVal; pStr = out.str(); } else if (typeid(T) == typeid(double)) { std::ostringstream out; out << std::setprecision(17) << pVal; pStr = out.str(); } else { throw no_converter(); } } /** * @brief Converts string holding a numerical value to numerical datatype representation. * @param pVal numerical value * @param pStr output string */ void ToVal(const std::string& pStr, T& pVal) const { try { if (typeid(T) == typeid(int)) { pVal = static_cast(std::stoi(pStr)); return; } else if (typeid(T) == typeid(long)) { pVal = static_cast(std::stol(pStr)); return; } else if (typeid(T) == typeid(long long)) { pVal = static_cast(std::stoll(pStr)); return; } else if (typeid(T) == typeid(unsigned)) { pVal = static_cast(std::stoul(pStr)); return; } else if (typeid(T) == typeid(unsigned long)) { pVal = static_cast(std::stoul(pStr)); return; } else if (typeid(T) == typeid(unsigned long long)) { pVal = static_cast(std::stoull(pStr)); return; } } catch (...) { if (!mConverterParams.mHasDefaultConverter) { throw; } else { pVal = static_cast(mConverterParams.mDefaultInteger); return; } } try { if (mConverterParams.mNumericLocale) { if (typeid(T) == typeid(float)) { pVal = static_cast(std::stof(pStr)); return; } else if (typeid(T) == typeid(double)) { pVal = static_cast(std::stod(pStr)); return; } else if (typeid(T) == typeid(long double)) { pVal = static_cast(std::stold(pStr)); return; } } else { if ((typeid(T) == typeid(float)) || (typeid(T) == typeid(double)) || (typeid(T) == typeid(long double))) { std::istringstream iss(pStr); iss.imbue(std::locale::classic()); iss >> pVal; if (iss.fail() || iss.bad() || !iss.eof()) { throw std::invalid_argument("istringstream: no conversion"); } return; } } } catch (...) { if (!mConverterParams.mHasDefaultConverter) { throw; } else { pVal = static_cast(mConverterParams.mDefaultFloat); return; } } if (typeid(T) == typeid(char)) { pVal = static_cast(pStr[0]); return; } else { throw no_converter(); } } private: const ConverterParams& mConverterParams; }; /** * @brief Specialized implementation handling string to string conversion. * @param pVal string * @param pStr string */ template<> inline void Converter::ToStr(const std::string& pVal, std::string& pStr) const { pStr = pVal; } /** * @brief Specialized implementation handling string to string conversion. * @param pVal string * @param pStr string */ template<> inline void Converter::ToVal(const std::string& pStr, std::string& pVal) const { pVal = pStr; } template using ConvFunc = std::function; /** * @brief Datastructure holding parameters controlling which row and column should be * treated as labels. */ struct LabelParams { /** * @brief Constructor * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting * it to -1 prevents column lookup by label name, and gives access * to all rows as document data. Default: 0 * @param pRowNameIdx specifies the zero-based column index of the row labels, setting * it to -1 prevents row lookup by label name, and gives access * to all columns as document data. Default: -1 */ explicit LabelParams(const int pColumnNameIdx = 0, const int pRowNameIdx = -1) : mColumnNameIdx(pColumnNameIdx) , mRowNameIdx(pRowNameIdx) { if (mColumnNameIdx < -1) { const std::string errStr = "invalid column name index " + std::to_string(mColumnNameIdx) + " < -1"; throw std::out_of_range(errStr); } if (mRowNameIdx < -1) { const std::string errStr = "invalid row name index " + std::to_string(mRowNameIdx) + " < -1"; throw std::out_of_range(errStr); } } /** * @brief specifies the zero-based row index of the column labels. */ int mColumnNameIdx; /** * @brief specifies the zero-based column index of the row labels. */ int mRowNameIdx; }; /** * @brief Datastructure holding parameters controlling how the CSV data fields are separated. */ struct SeparatorParams { /** * @brief Constructor * @param pSeparator specifies the column separator (default ','). * @param pTrim specifies whether to trim leading and trailing spaces from * cells read (default false). * @param pHasCR specifies whether a new document (i.e. not an existing document read) * should use CR/LF instead of only LF (default is to use standard * behavior of underlying platforms - CR/LF for Win, and LF for others). * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false) * @param pAutoQuote specifies whether to automatically dequote data during read, and add * quotes during write (default true). * @param pQuoteChar specifies the quote character (default '\"'). */ explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false, const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false, const bool pAutoQuote = true, const char pQuoteChar = '"') : mSeparator(pSeparator) , mTrim(pTrim) , mHasCR(pHasCR) , mQuotedLinebreaks(pQuotedLinebreaks) , mAutoQuote(pAutoQuote) , mQuoteChar(pQuoteChar) { } /** * @brief specifies the column separator. */ char mSeparator; /** * @brief specifies whether to trim leading and trailing spaces from cells read. */ bool mTrim; /** * @brief specifies whether new documents should use CR/LF instead of LF. */ bool mHasCR; /** * @brief specifies whether to allow line breaks in quoted text. */ bool mQuotedLinebreaks; /** * @brief specifies whether to automatically dequote cell data. */ bool mAutoQuote; /** * @brief specifies the quote character. */ char mQuoteChar; }; /** * @brief Datastructure holding parameters controlling how special line formats should be * treated. */ struct LineReaderParams { /** * @brief Constructor * @param pSkipCommentLines specifies whether to skip lines prefixed with * mCommentPrefix. Default: false * @param pCommentPrefix specifies which prefix character to indicate a comment * line. Default: # * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false */ explicit LineReaderParams(const bool pSkipCommentLines = false, const char pCommentPrefix = '#', const bool pSkipEmptyLines = false) : mSkipCommentLines(pSkipCommentLines) , mCommentPrefix(pCommentPrefix) , mSkipEmptyLines(pSkipEmptyLines) { } /** * @brief specifies whether to skip lines prefixed with mCommentPrefix. */ bool mSkipCommentLines; /** * @brief specifies which prefix character to indicate a comment line. */ char mCommentPrefix; /** * @brief specifies whether to skip empty lines. */ bool mSkipEmptyLines; }; /** * @brief Class representing a CSV document. */ class Document { public: /** * @brief Constructor * @param pPath specifies the path of an existing CSV-file to populate the Document * data with. * @param pLabelParams specifies which row and column should be treated as labels. * @param pSeparatorParams specifies which field and row separators should be used. * @param pConverterParams specifies how invalid numbers (including empty strings) should be * handled. * @param pLineReaderParams specifies how special line formats should be treated. */ explicit Document(const std::string& pPath = std::string(), const LabelParams& pLabelParams = LabelParams(), const SeparatorParams& pSeparatorParams = SeparatorParams(), const ConverterParams& pConverterParams = ConverterParams(), const LineReaderParams& pLineReaderParams = LineReaderParams()) : mPath(pPath) , mLabelParams(pLabelParams) , mSeparatorParams(pSeparatorParams) , mConverterParams(pConverterParams) , mLineReaderParams(pLineReaderParams) , mData() , mColumnNames() , mRowNames() { if (!mPath.empty()) { ReadCsv(); } } /** * @brief Constructor * @param pStream specifies a binary input stream to read CSV data from. * @param pLabelParams specifies which row and column should be treated as labels. * @param pSeparatorParams specifies which field and row separators should be used. * @param pConverterParams specifies how invalid numbers (including empty strings) should be * handled. * @param pLineReaderParams specifies how special line formats should be treated. */ explicit Document(std::istream& pStream, const LabelParams& pLabelParams = LabelParams(), const SeparatorParams& pSeparatorParams = SeparatorParams(), const ConverterParams& pConverterParams = ConverterParams(), const LineReaderParams& pLineReaderParams = LineReaderParams()) : mPath() , mLabelParams(pLabelParams) , mSeparatorParams(pSeparatorParams) , mConverterParams(pConverterParams) , mLineReaderParams(pLineReaderParams) , mData() , mColumnNames() , mRowNames() { ReadCsv(pStream); } /** * @brief Read Document data from file. * @param pPath specifies the path of an existing CSV-file to populate the Document * data with. * @param pLabelParams specifies which row and column should be treated as labels. * @param pSeparatorParams specifies which field and row separators should be used. * @param pConverterParams specifies how invalid numbers (including empty strings) should be * handled. * @param pLineReaderParams specifies how special line formats should be treated. */ void Load(const std::string& pPath, const LabelParams& pLabelParams = LabelParams(), const SeparatorParams& pSeparatorParams = SeparatorParams(), const ConverterParams& pConverterParams = ConverterParams(), const LineReaderParams& pLineReaderParams = LineReaderParams()) { mPath = pPath; mLabelParams = pLabelParams; mSeparatorParams = pSeparatorParams; mConverterParams = pConverterParams; mLineReaderParams = pLineReaderParams; ReadCsv(); } /** * @brief Read Document data from stream. * @param pStream specifies a binary input stream to read CSV data from. * @param pLabelParams specifies which row and column should be treated as labels. * @param pSeparatorParams specifies which field and row separators should be used. * @param pConverterParams specifies how invalid numbers (including empty strings) should be * handled. * @param pLineReaderParams specifies how special line formats should be treated. */ void Load(std::istream& pStream, const LabelParams& pLabelParams = LabelParams(), const SeparatorParams& pSeparatorParams = SeparatorParams(), const ConverterParams& pConverterParams = ConverterParams(), const LineReaderParams& pLineReaderParams = LineReaderParams()) { mPath = ""; mLabelParams = pLabelParams; mSeparatorParams = pSeparatorParams; mConverterParams = pConverterParams; mLineReaderParams = pLineReaderParams; ReadCsv(pStream); } /** * @brief Write Document data to file. * @param pPath optionally specifies the path where the CSV-file will be created * (if not specified, the original path provided when creating or * loading the Document data will be used). */ void Save(const std::string& pPath = std::string()) { if (!pPath.empty()) { mPath = pPath; } WriteCsv(); } /** * @brief Write Document data to stream. * @param pStream specifies a binary output stream to write the data to. */ void Save(std::ostream& pStream) const { WriteCsv(pStream); } /** * @brief Clears loaded Document data. * */ void Clear() { mData.clear(); mColumnNames.clear(); mRowNames.clear(); #ifdef HAS_CODECVT mIsUtf16 = false; mIsLE = false; #endif mHasUtf8BOM = false; } /** * @brief Get column index by name. * @param pColumnName column label name. * @returns zero-based column index. */ int GetColumnIdx(const std::string& pColumnName) const { if (mLabelParams.mColumnNameIdx >= 0) { if (mColumnNames.find(pColumnName) != mColumnNames.end()) { return static_cast(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1); } } return -1; } /** * @brief Get column by index. * @param pColumnIdx zero-based column index. * @returns vector of column data. */ template std::vector GetColumn(const size_t pColumnIdx) const { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); std::vector column; Converter converter(mConverterParams); for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) { if (dataColumnIdx < itRow->size()) { T val; converter.ToVal(itRow->at(dataColumnIdx), val); column.push_back(val); } else { const std::string errStr = "requested column index " + std::to_string(pColumnIdx) + " >= " + std::to_string(itRow->size() - GetDataColumnIndex(0)) + " (number of columns on row index " + std::to_string(std::distance(mData.begin(), itRow) - (mLabelParams.mColumnNameIdx + 1)) + ")"; throw std::out_of_range(errStr); } } } return column; } /** * @brief Get column by index. * @param pColumnIdx zero-based column index. * @param pToVal conversion function. * @returns vector of column data. */ template std::vector GetColumn(const size_t pColumnIdx, ConvFunc pToVal) const { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); std::vector column; for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) { T val; pToVal(itRow->at(dataColumnIdx), val); column.push_back(val); } } return column; } /** * @brief Get column by name. * @param pColumnName column label name. * @returns vector of column data. */ template std::vector GetColumn(const std::string& pColumnName) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } return GetColumn(static_cast(columnIdx)); } /** * @brief Get column by name. * @param pColumnName column label name. * @param pToVal conversion function. * @returns vector of column data. */ template std::vector GetColumn(const std::string& pColumnName, ConvFunc pToVal) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } return GetColumn(static_cast(columnIdx), pToVal); } /** * @brief Set column by index. * @param pColumnIdx zero-based column index. * @param pColumn vector of column data. */ template void SetColumn(const size_t pColumnIdx, const std::vector& pColumn) { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); while (GetDataRowIndex(pColumn.size()) > GetDataRowCount()) { std::vector row; row.resize(GetDataColumnCount()); mData.push_back(row); } if ((dataColumnIdx + 1) > GetDataColumnCount()) { for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) { itRow->resize(GetDataColumnIndex(dataColumnIdx + 1)); } } } Converter converter(mConverterParams); for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) { std::string str; converter.ToStr(*itRow, str); mData.at(static_cast(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at( dataColumnIdx) = str; } } /** * @brief Set column by name. * @param pColumnName column label name. * @param pColumn vector of column data. */ template void SetColumn(const std::string& pColumnName, const std::vector& pColumn) { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } SetColumn(static_cast(columnIdx), pColumn); } /** * @brief Remove column by index. * @param pColumnIdx zero-based column index. */ void RemoveColumn(const size_t pColumnIdx) { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) { itRow->erase(itRow->begin() + static_cast(dataColumnIdx)); } } UpdateColumnNames(); } /** * @brief Remove column by name. * @param pColumnName column label name. */ void RemoveColumn(const std::string& pColumnName) { int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } RemoveColumn(static_cast(columnIdx)); } /** * @brief Insert column at specified index. * @param pColumnIdx zero-based column index. * @param pColumn vector of column data (optional argument). * @param pColumnName column label name (optional argument). */ template void InsertColumn(const size_t pColumnIdx, const std::vector& pColumn = std::vector(), const std::string& pColumnName = std::string()) { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); std::vector column; if (pColumn.empty()) { column.resize(GetDataRowCount()); } else { column.resize(GetDataRowIndex(pColumn.size())); Converter converter(mConverterParams); for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) { std::string str; converter.ToStr(*itRow, str); const size_t rowIdx = static_cast(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1)); column.at(rowIdx) = str; } } while (column.size() > GetDataRowCount()) { std::vector row; const size_t columnCount = std::max(static_cast(mLabelParams.mColumnNameIdx + 1), GetDataColumnCount()); row.resize(columnCount); mData.push_back(row); } for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) { const size_t rowIdx = static_cast(std::distance(mData.begin(), itRow)); itRow->insert(itRow->begin() + static_cast(dataColumnIdx), column.at(rowIdx)); } } if (!pColumnName.empty()) { SetColumnName(pColumnIdx, pColumnName); } UpdateColumnNames(); } /** * @brief Get number of data columns (excluding label columns). * @returns column count. */ size_t GetColumnCount() const { const int count = static_cast((mData.size() > 0) ? mData.at(0).size() : 0) - (mLabelParams.mRowNameIdx + 1); return (count >= 0) ? static_cast(count) : 0; } /** * @brief Get row index by name. * @param pRowName row label name. * @returns zero-based row index. */ int GetRowIdx(const std::string& pRowName) const { if (mLabelParams.mRowNameIdx >= 0) { if (mRowNames.find(pRowName) != mRowNames.end()) { return static_cast(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1); } } return -1; } /** * @brief Get row by index. * @param pRowIdx zero-based row index. * @returns vector of row data. */ template std::vector GetRow(const size_t pRowIdx) const { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); std::vector row; Converter converter(mConverterParams); for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) { if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) { T val; converter.ToVal(*itCol, val); row.push_back(val); } } return row; } /** * @brief Get row by index. * @param pRowIdx zero-based row index. * @param pToVal conversion function. * @returns vector of row data. */ template std::vector GetRow(const size_t pRowIdx, ConvFunc pToVal) const { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); std::vector row; Converter converter(mConverterParams); for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) { if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) { T val; pToVal(*itCol, val); row.push_back(val); } } return row; } /** * @brief Get row by name. * @param pRowName row label name. * @returns vector of row data. */ template std::vector GetRow(const std::string& pRowName) const { int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetRow(static_cast(rowIdx)); } /** * @brief Get row by name. * @param pRowName row label name. * @param pToVal conversion function. * @returns vector of row data. */ template std::vector GetRow(const std::string& pRowName, ConvFunc pToVal) const { int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetRow(static_cast(rowIdx), pToVal); } /** * @brief Set row by index. * @param pRowIdx zero-based row index. * @param pRow vector of row data. */ template void SetRow(const size_t pRowIdx, const std::vector& pRow) { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); while ((dataRowIdx + 1) > GetDataRowCount()) { std::vector row; row.resize(GetDataColumnCount()); mData.push_back(row); } if (pRow.size() > GetDataColumnCount()) { for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) { itRow->resize(GetDataColumnIndex(pRow.size())); } } } Converter converter(mConverterParams); for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) { std::string str; converter.ToStr(*itCol, str); mData.at(dataRowIdx).at(static_cast(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str; } } /** * @brief Set row by name. * @param pRowName row label name. * @param pRow vector of row data. */ template void SetRow(const std::string& pRowName, const std::vector& pRow) { int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return SetRow(static_cast(rowIdx), pRow); } /** * @brief Remove row by index. * @param pRowIdx zero-based row index. */ void RemoveRow(const size_t pRowIdx) { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); mData.erase(mData.begin() + static_cast(dataRowIdx)); UpdateRowNames(); } /** * @brief Remove row by name. * @param pRowName row label name. */ void RemoveRow(const std::string& pRowName) { int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } RemoveRow(static_cast(rowIdx)); } /** * @brief Insert row at specified index. * @param pRowIdx zero-based row index. * @param pRow vector of row data (optional argument). * @param pRowName row label name (optional argument). */ template void InsertRow(const size_t pRowIdx, const std::vector& pRow = std::vector(), const std::string& pRowName = std::string()) { const size_t rowIdx = GetDataRowIndex(pRowIdx); std::vector row; if (pRow.empty()) { row.resize(GetDataColumnCount()); } else { row.resize(GetDataColumnIndex(pRow.size())); Converter converter(mConverterParams); for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) { std::string str; converter.ToStr(*itCol, str); row.at(static_cast(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str; } } while (rowIdx > GetDataRowCount()) { std::vector tempRow; tempRow.resize(GetDataColumnCount()); mData.push_back(tempRow); } mData.insert(mData.begin() + static_cast(rowIdx), row); if (!pRowName.empty()) { SetRowName(pRowIdx, pRowName); } UpdateRowNames(); } /** * @brief Get number of data rows (excluding label rows). * @returns row count. */ size_t GetRowCount() const { const int count = static_cast(mData.size()) - (mLabelParams.mColumnNameIdx + 1); return (count >= 0) ? static_cast(count) : 0; } /** * @brief Get cell by index. * @param pColumnIdx zero-based column index. * @param pRowIdx zero-based row index. * @returns cell data. */ template T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); const size_t dataRowIdx = GetDataRowIndex(pRowIdx); T val; Converter converter(mConverterParams); converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); return val; } /** * @brief Get cell by index. * @param pColumnIdx zero-based column index. * @param pRowIdx zero-based row index. * @param pToVal conversion function. * @returns cell data. */ template T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc pToVal) const { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); const size_t dataRowIdx = GetDataRowIndex(pRowIdx); T val; pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); return val; } /** * @brief Get cell by name. * @param pColumnName column label name. * @param pRowName row label name. * @returns cell data. */ template T GetCell(const std::string& pColumnName, const std::string& pRowName) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetCell(static_cast(columnIdx), static_cast(rowIdx)); } /** * @brief Get cell by name. * @param pColumnName column label name. * @param pRowName row label name. * @param pToVal conversion function. * @returns cell data. */ template T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc pToVal) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetCell(static_cast(columnIdx), static_cast(rowIdx), pToVal); } /** * @brief Get cell by column name and row index. * @param pColumnName column label name. * @param pRowIdx zero-based row index. * @returns cell data. */ template T GetCell(const std::string& pColumnName, const size_t pRowIdx) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } return GetCell(static_cast(columnIdx), pRowIdx); } /** * @brief Get cell by column name and row index. * @param pColumnName column label name. * @param pRowIdx zero-based row index. * @param pToVal conversion function. * @returns cell data. */ template T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc pToVal) const { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } return GetCell(static_cast(columnIdx), pRowIdx, pToVal); } /** * @brief Get cell by column index and row name. * @param pColumnIdx zero-based column index. * @param pRowName row label name. * @returns cell data. */ template T GetCell(const size_t pColumnIdx, const std::string& pRowName) const { const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetCell(pColumnIdx, static_cast(rowIdx)); } /** * @brief Get cell by column index and row name. * @param pColumnIdx zero-based column index. * @param pRowName row label name. * @param pToVal conversion function. * @returns cell data. */ template T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc pToVal) const { const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } return GetCell(pColumnIdx, static_cast(rowIdx), pToVal); } /** * @brief Set cell by index. * @param pRowIdx zero-based row index. * @param pColumnIdx zero-based column index. * @param pCell cell data. */ template void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell) { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); const size_t dataRowIdx = GetDataRowIndex(pRowIdx); while ((dataRowIdx + 1) > GetDataRowCount()) { std::vector row; row.resize(GetDataColumnCount()); mData.push_back(row); } if ((dataColumnIdx + 1) > GetDataColumnCount()) { for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx) { itRow->resize(dataColumnIdx + 1); } } } std::string str; Converter converter(mConverterParams); converter.ToStr(pCell, str); mData.at(dataRowIdx).at(dataColumnIdx) = str; } /** * @brief Set cell by name. * @param pColumnName column label name. * @param pRowName row label name. * @param pCell cell data. */ template void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell) { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } SetCell(static_cast(columnIdx), static_cast(rowIdx), pCell); } /** * @brief Set cell by column index and row name. * @param pColumnIdx zero-based column index. * @param pRowName row label name. * @param pCell cell data. */ template void SetCell(const size_t pColumnIdx, const std::string& pRowName, const T& pCell) { const int rowIdx = GetRowIdx(pRowName); if (rowIdx < 0) { throw std::out_of_range("row not found: " + pRowName); } SetCell(pColumnIdx, static_cast(rowIdx), pCell); } /** * @brief Set cell by column name and row index. * @param pColumnName column label name. * @param pRowIdx zero-based row index. * @param pCell cell data. */ template void SetCell(const std::string& pColumnName, const size_t pRowIdx, const T& pCell) { const int columnIdx = GetColumnIdx(pColumnName); if (columnIdx < 0) { throw std::out_of_range("column not found: " + pColumnName); } SetCell(static_cast(columnIdx), pRowIdx, pCell); } /** * @brief Get column name * @param pColumnIdx zero-based column index. * @returns column name. */ std::string GetColumnName(const size_t pColumnIdx) const { const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); if (mLabelParams.mColumnNameIdx < 0) { throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); } return mData.at(static_cast(mLabelParams.mColumnNameIdx)).at(dataColumnIdx); } /** * @brief Set column name * @param pColumnIdx zero-based column index. * @param pColumnName column name. */ void SetColumnName(size_t pColumnIdx, const std::string& pColumnName) { if (mLabelParams.mColumnNameIdx < 0) { throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); } const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); mColumnNames[pColumnName] = dataColumnIdx; // increase table size if necessary: const size_t rowIdx = static_cast(mLabelParams.mColumnNameIdx); if (rowIdx >= mData.size()) { mData.resize(rowIdx + 1); } auto& row = mData[rowIdx]; if (dataColumnIdx >= row.size()) { row.resize(dataColumnIdx + 1); } mData.at(static_cast(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName; } /** * @brief Get column names * @returns vector of column names. */ std::vector GetColumnNames() const { if (mLabelParams.mColumnNameIdx >= 0) { return std::vector(mData.at(static_cast(mLabelParams.mColumnNameIdx)).begin() + (mLabelParams.mRowNameIdx + 1), mData.at(static_cast(mLabelParams.mColumnNameIdx)).end()); } return std::vector(); } /** * @brief Get row name * @param pRowIdx zero-based column index. * @returns row name. */ std::string GetRowName(const size_t pRowIdx) const { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); if (mLabelParams.mRowNameIdx < 0) { throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); } return mData.at(dataRowIdx).at(static_cast(mLabelParams.mRowNameIdx)); } /** * @brief Set row name * @param pRowIdx zero-based row index. * @param pRowName row name. */ void SetRowName(size_t pRowIdx, const std::string& pRowName) { const size_t dataRowIdx = GetDataRowIndex(pRowIdx); mRowNames[pRowName] = dataRowIdx; if (mLabelParams.mRowNameIdx < 0) { throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); } // increase table size if necessary: if (dataRowIdx >= mData.size()) { mData.resize(dataRowIdx + 1); } auto& row = mData[dataRowIdx]; if (mLabelParams.mRowNameIdx >= static_cast(row.size())) { row.resize(static_cast(mLabelParams.mRowNameIdx) + 1); } mData.at(dataRowIdx).at(static_cast(mLabelParams.mRowNameIdx)) = pRowName; } /** * @brief Get row names * @returns vector of row names. */ std::vector GetRowNames() const { std::vector rownames; if (mLabelParams.mRowNameIdx >= 0) { for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) { if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) { rownames.push_back(itRow->at(static_cast(mLabelParams.mRowNameIdx))); } } } return rownames; } private: void ReadCsv() { std::ifstream stream; stream.exceptions(std::ifstream::failbit | std::ifstream::badbit); stream.open(mPath, std::ios::binary); ReadCsv(stream); } void ReadCsv(std::istream& pStream) { Clear(); pStream.seekg(0, std::ios::end); std::streamsize length = pStream.tellg(); pStream.seekg(0, std::ios::beg); #ifdef HAS_CODECVT std::vector bom2b(2, '\0'); if (length >= 2) { pStream.read(bom2b.data(), 2); pStream.seekg(0, std::ios::beg); } static const std::vector bomU16le = { '\xff', '\xfe' }; static const std::vector bomU16be = { '\xfe', '\xff' }; if ((bom2b == bomU16le) || (bom2b == bomU16be)) { mIsUtf16 = true; mIsLE = (bom2b == bomU16le); std::wifstream wstream; wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit); wstream.open(mPath, std::ios::binary); if (mIsLE) { wstream.imbue(std::locale(wstream.getloc(), new std::codecvt_utf16(std::consume_header | std::little_endian)>)); } else { wstream.imbue(std::locale(wstream.getloc(), new std::codecvt_utf16)); } std::wstringstream wss; wss << wstream.rdbuf(); std::string utf8 = ToString(wss.str()); std::stringstream ss(utf8); ParseCsv(ss, static_cast(utf8.size())); } else #endif { // check for UTF-8 Byte order mark and skip it when found if (length >= 3) { std::vector bom3b(3, '\0'); pStream.read(bom3b.data(), 3); if (bom3b != s_Utf8BOM) { // file does not start with a UTF-8 Byte order mark pStream.seekg(0, std::ios::beg); } else { // file did start with a UTF-8 Byte order mark, simply skip it length -= 3; mHasUtf8BOM = true; } } ParseCsv(pStream, length); } } void ParseCsv(std::istream& pStream, std::streamsize p_FileLength) { const std::streamsize bufLength = 64 * 1024; std::vector buffer(bufLength); std::vector row; std::string cell; bool quoted = false; int cr = 0; int lf = 0; while (p_FileLength > 0) { const std::streamsize toReadLength = std::min(p_FileLength, bufLength); pStream.read(buffer.data(), toReadLength); // With user-specified istream opened in non-binary mode on windows, we may have a // data length mismatch, so ensure we don't parse outside actual data length read. const std::streamsize readLength = pStream.gcount(); if (readLength <= 0) { break; } for (size_t i = 0; i < static_cast(readLength); ++i) { if (buffer[i] == mSeparatorParams.mQuoteChar) { if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar)) { quoted = !quoted; } else if (mSeparatorParams.mTrim) { // allow whitespace before first mQuoteChar const auto firstQuote = std::find(cell.begin(), cell.end(), mSeparatorParams.mQuoteChar); if (std::all_of(cell.begin(), firstQuote, [](int ch) { return isspace(ch); })) { quoted = !quoted; } } cell += buffer[i]; } else if (buffer[i] == mSeparatorParams.mSeparator) { if (!quoted) { row.push_back(Unquote(Trim(cell))); cell.clear(); } else { cell += buffer[i]; } } else if (buffer[i] == '\r') { if (mSeparatorParams.mQuotedLinebreaks && quoted) { cell += buffer[i]; } else { ++cr; } } else if (buffer[i] == '\n') { if (mSeparatorParams.mQuotedLinebreaks && quoted) { cell += buffer[i]; } else { ++lf; if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty()) { // skip empty line } else { row.push_back(Unquote(Trim(cell))); if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() && (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) { // skip comment line } else { mData.push_back(row); } cell.clear(); row.clear(); quoted = false; } } } else { cell += buffer[i]; } } p_FileLength -= readLength; } // Handle last cell without linebreak if (!cell.empty()) { row.push_back(Unquote(Trim(cell))); cell.clear(); } // Handle last line without linebreak if (!row.empty()) { if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() && (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) { // skip comment line } else { mData.push_back(row); } row.clear(); } // Assume CR/LF if at least half the linebreaks have CR mSeparatorParams.mHasCR = (cr > (lf / 2)); // Set up column labels UpdateColumnNames(); // Set up row labels UpdateRowNames(); } void WriteCsv() const { #ifdef HAS_CODECVT if (mIsUtf16) { std::stringstream ss; WriteCsv(ss); std::string utf8 = ss.str(); std::wstring wstr = ToWString(utf8); std::wofstream wstream; wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit); wstream.open(mPath, std::ios::binary | std::ios::trunc); if (mIsLE) { wstream.imbue(std::locale(wstream.getloc(), new std::codecvt_utf16(std::little_endian)>)); } else { wstream.imbue(std::locale(wstream.getloc(), new std::codecvt_utf16)); } wstream << static_cast(0xfeff); wstream << wstr; } else #endif { std::ofstream stream; stream.exceptions(std::ofstream::failbit | std::ofstream::badbit); stream.open(mPath, std::ios::binary | std::ios::trunc); if (mHasUtf8BOM) { stream.write(s_Utf8BOM.data(), 3); } WriteCsv(stream); } } void WriteCsv(std::ostream& pStream) const { for (auto itr = mData.begin(); itr != mData.end(); ++itr) { for (auto itc = itr->begin(); itc != itr->end(); ++itc) { if (mSeparatorParams.mAutoQuote && ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) || (itc->find(' ') != std::string::npos) || (itc->find('\n') != std::string::npos))) { // escape quotes in string std::string str = *itc; const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr); pStream << quoteCharStr << str << quoteCharStr; } else { pStream << *itc; } if (std::distance(itc, itr->end()) > 1) { pStream << mSeparatorParams.mSeparator; } } pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n"); } } size_t GetDataRowCount() const { return mData.size(); } size_t GetDataColumnCount() const { const size_t firstDataRow = static_cast((mLabelParams.mColumnNameIdx >= 0) ? mLabelParams.mColumnNameIdx : 0); return (mData.size() > firstDataRow) ? mData.at(firstDataRow).size() : 0; } inline size_t GetDataRowIndex(const size_t pRowIdx) const { const size_t firstDataRow = static_cast((mLabelParams.mColumnNameIdx + 1 >= 0) ? mLabelParams.mColumnNameIdx + 1 : 0); return pRowIdx + firstDataRow; } inline size_t GetDataColumnIndex(const size_t pColumnIdx) const { const size_t firstDataColumn = static_cast((mLabelParams.mRowNameIdx + 1 >= 0) ? mLabelParams.mRowNameIdx + 1 : 0); return pColumnIdx + firstDataColumn; } std::string Trim(const std::string& pStr) const { if (mSeparatorParams.mTrim) { std::string str = pStr; // ltrim str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); })); // rtrim str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end()); return str; } else { return pStr; } } std::string Unquote(const std::string& pStr) const { if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) && (pStr.front() == mSeparatorParams.mQuoteChar) && (pStr.back() == mSeparatorParams.mQuoteChar)) { // remove start/end quotes std::string str = pStr.substr(1, pStr.size() - 2); // unescape quotes in string const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr); return str; } else { return pStr; } } void UpdateColumnNames() { mColumnNames.clear(); if ((mLabelParams.mColumnNameIdx >= 0) && (static_cast(mData.size()) > mLabelParams.mColumnNameIdx)) { size_t i = 0; for (auto& columnName : mData[static_cast(mLabelParams.mColumnNameIdx)]) { mColumnNames[columnName] = i++; } } } void UpdateRowNames() { mRowNames.clear(); if ((mLabelParams.mRowNameIdx >= 0) && (static_cast(mData.size()) > (mLabelParams.mColumnNameIdx + 1))) { size_t i = 0; for (auto& dataRow : mData) { if (static_cast(dataRow.size()) > mLabelParams.mRowNameIdx) { mRowNames[dataRow[static_cast(mLabelParams.mRowNameIdx)]] = i++; } } } } #ifdef HAS_CODECVT #if defined(_MSC_VER) #pragma warning (push) #pragma warning (disable: 4996) #endif static std::string ToString(const std::wstring& pWStr) { return std::wstring_convert, wchar_t>{ }.to_bytes(pWStr); } static std::wstring ToWString(const std::string& pStr) { return std::wstring_convert, wchar_t>{ }.from_bytes(pStr); } #if defined(_MSC_VER) #pragma warning (pop) #endif #endif static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace) { size_t pos = 0; while ((pos = pStr.find(pSearch, pos)) != std::string::npos) { pStr.replace(pos, pSearch.size(), pReplace); pos += pReplace.size(); } } private: std::string mPath; LabelParams mLabelParams; SeparatorParams mSeparatorParams; ConverterParams mConverterParams; LineReaderParams mLineReaderParams; std::vector> mData; std::map mColumnNames; std::map mRowNames; #ifdef HAS_CODECVT bool mIsUtf16 = false; bool mIsLE = false; #endif bool mHasUtf8BOM = false; }; }