12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132 |
- /*
- * rapidcsv.h
- *
- * URL: https://github.com/d99kris/rapidcsv
- * Version: 8.84
- *
- * Copyright (C) 2017-2024 Kristofer Berggren
- * All rights reserved.
- *
- * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details.
- *
- */
- #pragma once
- #include <algorithm>
- #include <cassert>
- #include <cmath>
- #ifdef HAS_CODECVT
- #include <codecvt>
- #include <locale>
- #endif
- #include <fstream>
- #include <functional>
- #include <iomanip>
- #include <iostream>
- #include <limits>
- #include <map>
- #include <sstream>
- #include <string>
- #include <typeinfo>
- #include <vector>
- namespace rapidcsv
- {
- #if defined(_MSC_VER)
- static const bool sPlatformHasCR = true;
- #else
- static const bool sPlatformHasCR = false;
- #endif
- static const std::vector<char> s_Utf8BOM = { '\xef', '\xbb', '\xbf' };
- /**
- * @brief Datastructure holding parameters controlling how invalid numbers (including
- * empty strings) should be handled.
- * @brief 保存控制如何处理无效数字(包括空字符串)的参数的数据结构
- */
- struct ConverterParams
- {
- /**
- * @brief Constructor
- * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be
- * converted to a default numerical value, instead of causing
- * an exception to be thrown (default).
- * 指定是否将非数值字符串转换为默认的数值,
- * 而不是抛出异常(默认情况下抛出异常)
- * @param pDefaultFloat floating-point default value to represent invalid numbers.
- * 表示无效数字的浮点数默认值
- * @param pDefaultInteger integer default value to represent invalid numbers.
- * 表示无效数字的整数默认值
- * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default
- * true).
- * 指定是否遵循 `LC_NUMERIC` 区域设置
- */
- explicit ConverterParams(const bool pHasDefaultConverter = false,
- const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(),
- const long long pDefaultInteger = 0,
- const bool pNumericLocale = true)
- : mHasDefaultConverter(pHasDefaultConverter)
- , mDefaultFloat(pDefaultFloat)
- , mDefaultInteger(pDefaultInteger)
- , mNumericLocale(pNumericLocale)
- {
- }
- /**
- * @brief specifies if conversion of non-numerical strings shall be converted to a default
- * numerical value, instead of causing an exception to be thrown (default).
- * 指定是否将非数值字符串转换为默认的数值,而不是抛出异常(默认情况下抛出异常)
- */
- bool mHasDefaultConverter;
- /**
- * @brief floating-point default value to represent invalid numbers.
- * 表示无效数字的浮点数默认值
- */
- long double mDefaultFloat;
- /**
- * @brief integer default value to represent invalid numbers.
- * 表示无效数字的整数默认值
- */
- long long mDefaultInteger;
- /**
- * @brief specifies whether to honor `LC_NUMERIC` locale.
- * 指定是否遵循 `LC_NUMERIC` 区域设置
- * @property 不同的区域设置中,小数点符号可能是不同的
- */
- bool mNumericLocale;
- };
- /**
- * @brief Exception thrown when attempting to access Document data in a datatype which
- * is not supported by the Converter class.
- * 当尝试以`Converter`类不支持的数据类型访问`Document`数据时抛出的异常
- */
- class no_converter : public std::exception
- {
- public:
- /**
- * @brief Provides details about the exception
- * 提供有关异常的详细信息
- * @returns an explanatory string
- * 描述性字符串
- */
- const char* what() const throw() override
- {
- return "unsupported conversion datatype";
- }
- };
- /**
- * @brief Class providing conversion to/from numerical datatypes and strings. Only
- * intended for rapidcsv internal usage, but exposed externally to allow
- * specialization for custom datatype conversions.
- * 提供数值数据类型和字符串之间转换的类
- * 仅供`rapidcsv`内部使用,但公开以允许对自定义数据类型进行特化。
- */
- template<typename T>
- class Converter
- {
- public:
- /**
- * @brief Constructor
- * @param pConverterParams specifies how conversion of non-numerical values to
- * numerical datatype shall be handled.
- * 指定如何处理非数值值转换为数值数据类型的参数。
- */
- Converter(const ConverterParams& pConverterParams)
- : mConverterParams(pConverterParams)
- {
- }
- /**
- * @brief Converts numerical value to string representation.
- * 将数值转换为字符串表示
- * @param pVal numerical value
- * @param pStr output string
- */
- void ToStr(const T& pVal, std::string& pStr) const
- {
- if (typeid(T) == typeid(int) ||
- typeid(T) == typeid(long) ||
- typeid(T) == typeid(long long) ||
- typeid(T) == typeid(unsigned) ||
- typeid(T) == typeid(unsigned long) ||
- typeid(T) == typeid(unsigned long long) ||
- typeid(T) == typeid(long double) ||
- typeid(T) == typeid(char))
- {
- std::ostringstream out;
- out << pVal;
- pStr = out.str();
- }
- else if (typeid(T) == typeid(float))
- {
- std::ostringstream out;
- out << std::setprecision(9) << pVal;
- pStr = out.str();
- }
- else if (typeid(T) == typeid(double))
- {
- std::ostringstream out;
- out << std::setprecision(17) << pVal;
- pStr = out.str();
- }
- else
- {
- throw no_converter();
- }
- }
- /**
- * @brief Converts string holding a numerical value to numerical datatype representation.
- * 将包含数值的字符串转换为数值数据类型表示
- * @param pVal numerical value
- * @param pStr output string
- */
- void ToVal(const std::string& pStr, T& pVal) const
- {
- try
- {
- if (typeid(T) == typeid(int))
- {
- pVal = static_cast<T>(std::stoi(pStr));
- return;
- }
- else if (typeid(T) == typeid(long))
- {
- pVal = static_cast<T>(std::stol(pStr));
- return;
- }
- else if (typeid(T) == typeid(long long))
- {
- pVal = static_cast<T>(std::stoll(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned))
- {
- pVal = static_cast<T>(std::stoul(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned long))
- {
- pVal = static_cast<T>(std::stoul(pStr));
- return;
- }
- else if (typeid(T) == typeid(unsigned long long))
- {
- pVal = static_cast<T>(std::stoull(pStr));
- return;
- }
- }
- catch (...)
- {
- if (!mConverterParams.mHasDefaultConverter)
- {
- throw;
- }
- else
- {
- pVal = static_cast<T>(mConverterParams.mDefaultInteger);
- return;
- }
- }
- try
- {
- if (mConverterParams.mNumericLocale)
- {
- if (typeid(T) == typeid(float))
- {
- pVal = static_cast<T>(std::stof(pStr));
- return;
- }
- else if (typeid(T) == typeid(double))
- {
- pVal = static_cast<T>(std::stod(pStr));
- return;
- }
- else if (typeid(T) == typeid(long double))
- {
- pVal = static_cast<T>(std::stold(pStr));
- return;
- }
- }
- else
- {
- if ((typeid(T) == typeid(float)) ||
- (typeid(T) == typeid(double)) ||
- (typeid(T) == typeid(long double)))
- {
- std::istringstream iss(pStr);
- iss.imbue(std::locale::classic());
- iss >> pVal;
- if (iss.fail() || iss.bad() || !iss.eof())
- {
- throw std::invalid_argument("istringstream: no conversion");
- }
- return;
- }
- }
- }
- catch (...)
- {
- if (!mConverterParams.mHasDefaultConverter)
- {
- throw;
- }
- else
- {
- pVal = static_cast<T>(mConverterParams.mDefaultFloat);
- return;
- }
- }
- if (typeid(T) == typeid(char))
- {
- pVal = static_cast<T>(pStr[0]);
- return;
- }
- else
- {
- throw no_converter();
- }
- }
- private:
- const ConverterParams& mConverterParams;
- };
- /**
- * @brief Specialized implementation handling string to string conversion.
- * 专门用于处理字符串到字符串转换的特化实现
- * @param pVal string
- * @param pStr string
- */
- template<>
- inline void Converter<std::string>::ToStr(const std::string& pVal, std::string& pStr) const
- {
- pStr = pVal;
- }
- /**
- * @brief Specialized implementation handling string to string conversion.
- * 专门用于处理字符串到字符串转换的特化实现
- * @param pVal string
- * @param pStr string
- */
- template<>
- inline void Converter<std::string>::ToVal(const std::string& pStr, std::string& pVal) const
- {
- pVal = pStr;
- }
- template<typename T>
- using ConvFunc = std::function<void (const std::string & pStr, T & pVal)>;
- /**
- * @brief Datastructure holding parameters controlling which row and column should be
- * treated as labels.
- * 保存控制哪些行和列应被视为标签的参数的数据结构
- */
- struct LabelParams
- {
- /**
- * @brief Constructor
- * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting
- * it to -1 prevents column lookup by label name, and gives access
- * to all rows as document data. Default: 0
- * 指定列标签的基于零的行索引,将其设置为 -1
- * 将阻止通过标签名进行列查找,并允许访问所有行作为文档数据。
- * 默认值:0
- * @param pRowNameIdx specifies the zero-based column index of the row labels, setting
- * it to -1 prevents row lookup by label name, and gives access
- * to all columns as document data. Default: -1
- * 指定行标签的基于零的列索引,将其设置为 -1
- * 将阻止通过标签名进行行查找,并允许访问所有列作为文档数据。
- * 默认值:-1
- */
- explicit LabelParams(const int pColumnNameIdx = 0, const int pRowNameIdx = -1)
- : mColumnNameIdx(pColumnNameIdx)
- , mRowNameIdx(pRowNameIdx)
- {
- if (mColumnNameIdx < -1)
- {
- const std::string errStr = "invalid column name index " +
- std::to_string(mColumnNameIdx) + " < -1";
- throw std::out_of_range(errStr);
- }
- if (mRowNameIdx < -1)
- {
- const std::string errStr = "invalid row name index " +
- std::to_string(mRowNameIdx) + " < -1";
- throw std::out_of_range(errStr);
- }
- }
- /**
- * @brief specifies the zero-based row index of the column labels.
- * 指定列标签的基于零的行索引
- */
- int mColumnNameIdx;
- /**
- * @brief specifies the zero-based column index of the row labels.
- * 指定行标签的基于零的列索引
- */
- int mRowNameIdx;
- };
- /**
- * @brief Datastructure holding parameters controlling how the CSV data fields are separated.
- * 保存控制如何分隔`CSV`数据字段的参数的数据结构
- */
- struct SeparatorParams
- {
- /**
- * @brief Constructor
- * @param pSeparator specifies the column separator (default ',').
- * 指定列分隔符(默认是',')
- * @param pTrim specifies whether to trim leading and trailing spaces from
- * cells read (default false).
- * 指定是否修剪读取到的单元格中的前导和尾随空格(默认是 false)
- * @param pHasCR specifies whether a new document (i.e. not an existing document read)
- * should use CR/LF instead of only LF (default is to use standard
- * behavior of underlying platforms - CR/LF for Win, and LF for others).
- * 指定新文档(即,不是读取的现有文档)是否应使用 CR/LF 而不是仅使用 LF
- * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false)
- * 指定是否允许带引号的文本中包含换行符(默认是 false)
- * @param pAutoQuote specifies whether to automatically dequote data during read, and add
- * quotes during write (default true).
- * 指定在读取时是否自动去除引号,并在写入时添加引号(默认是 true)
- * @param pQuoteChar specifies the quote character (default '\"').
- * 指定引号字符(默认是 '\"')
- */
- explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false,
- const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false,
- const bool pAutoQuote = true, const char pQuoteChar = '"')
- : mSeparator(pSeparator)
- , mTrim(pTrim)
- , mHasCR(pHasCR)
- , mQuotedLinebreaks(pQuotedLinebreaks)
- , mAutoQuote(pAutoQuote)
- , mQuoteChar(pQuoteChar)
- {
- }
- /**
- * @brief specifies the column separator.
- * 指定列分隔符
- */
- char mSeparator;
- /**
- * @brief specifies whether to trim leading and trailing spaces from cells read.
- * 指定是否修剪读取到的单元格中的前导和尾随空格
- */
- bool mTrim;
- /**
- * @brief specifies whether new documents should use CR/LF instead of LF.
- * 指定新文档是否应使用`CR/LF`而不是`LF`
- */
- bool mHasCR;
- /**
- * @brief specifies whether to allow line breaks in quoted text.
- * 指定是否允许带引号的文本中包含换行符
- */
- bool mQuotedLinebreaks;
- /**
- * @brief specifies whether to automatically dequote cell data.
- * 指定是否自动去除单元格数据的引号
- */
- bool mAutoQuote;
- /**
- * @brief specifies the quote character.
- * 指定引号字符
- */
- char mQuoteChar;
- };
- /**
- * @brief Datastructure holding parameters controlling how special line formats should be
- * treated.
- * 保存控制如何处理特殊行格式的参数的数据结构
- */
- struct LineReaderParams
- {
- /**
- * @brief Constructor
- * @param pSkipCommentLines specifies whether to skip lines prefixed with
- * mCommentPrefix. Default: true
- * 指定是否跳过以 mCommentPrefix 为前缀的行。默认值:true
- * @param pCommentPrefix specifies which prefix character to indicate a comment
- * line. Default: #
- * 指定用于表示注释行的前缀字符。默认值:#
- * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false
- * 指定是否跳过空行。默认值:false
- */
- explicit LineReaderParams(const bool pSkipCommentLines = true,
- const char pCommentPrefix = '#',
- const bool pSkipEmptyLines = false)
- : mSkipCommentLines(pSkipCommentLines)
- , mCommentPrefix(pCommentPrefix)
- , mSkipEmptyLines(pSkipEmptyLines)
- {
- }
- /**
- * @brief specifies whether to skip lines prefixed with mCommentPrefix.
- * 指定是否跳过以`mCommentPrefix`为前缀的行
- */
- bool mSkipCommentLines;
- /**
- * @brief specifies which prefix character to indicate a comment line.
- * 指定用于表示注释行的前缀字符
- */
- char mCommentPrefix;
- /**
- * @brief specifies whether to skip empty lines.
- * 指定是否跳过空行
- */
- bool mSkipEmptyLines;
- };
- /**
- * @brief Class representing a CSV document.
- * 表示 CSV 文档的类
- */
- class Document
- {
- public:
- /**
- * @brief Constructor
- * @param pPath specifies the path of an existing CSV-file to populate the Document
- * data with.
- * 指定现有 CSV 文件的路径,以填充 Document 的数据
- * @param pLabelParams specifies which row and column should be treated as labels.
- * 指定应作为标签的行和列
- * @param pSeparatorParams specifies which field and row separators should be used.
- * 指定应使用的字段和行分隔符
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * 指定应如何处理无效的数字(包括空字符串)
- * @param pLineReaderParams specifies how special line formats should be treated.
- * 指定应如何处理特殊的行格式
- */
- explicit Document(const std::string& pPath = std::string(),
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- : mPath(pPath)
- , mLabelParams(pLabelParams)
- , mSeparatorParams(pSeparatorParams)
- , mConverterParams(pConverterParams)
- , mLineReaderParams(pLineReaderParams)
- , mData()
- , mColumnNames()
- , mRowNames()
- {
- if (!mPath.empty())
- {
- ReadCsv();
- }
- }
- /**
- * @brief Constructor
- * @param pStream specifies a binary input stream to read CSV data from.
- * 指定用于读取 CSV 数据的二进制输入流
- * @param pLabelParams specifies which row and column should be treated as labels.
- * 指定应作为标签的行和列
- * @param pSeparatorParams specifies which field and row separators should be used.
- * 指定应使用的字段和行分隔符
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * 指定应如何处理无效的数字(包括空字符串)
- * @param pLineReaderParams specifies how special line formats should be treated.
- * 指定应如何处理特殊的行格式
- */
- explicit Document(std::istream& pStream,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- : mPath()
- , mLabelParams(pLabelParams)
- , mSeparatorParams(pSeparatorParams)
- , mConverterParams(pConverterParams)
- , mLineReaderParams(pLineReaderParams)
- , mData()
- , mColumnNames()
- , mRowNames()
- {
- ReadCsv(pStream);
- }
- /**
- * @brief Read Document data from file.
- * 从文件中读取`Document`数据
- * @param pPath specifies the path of an existing CSV-file to populate the Document
- * data with.
- * 指定现有 CSV 文件的路径,以填充 Document 的数据
- * @param pLabelParams specifies which row and column should be treated as labels.
- * 指定应作为标签的行和列
- * @param pSeparatorParams specifies which field and row separators should be used.
- * 指定应使用的字段和行分隔符
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * 指定应如何处理无效的数字(包括空字符串)
- * @param pLineReaderParams specifies how special line formats should be treated.
- * 指定应如何处理特殊的行格式
- */
- void Load(const std::string& pPath,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- {
- mPath = pPath;
- mLabelParams = pLabelParams;
- mSeparatorParams = pSeparatorParams;
- mConverterParams = pConverterParams;
- mLineReaderParams = pLineReaderParams;
- ReadCsv();
- }
- /**
- * @brief Read Document data from stream.
- * 从流中读取`Document`数据
- * @param pStream specifies a binary input stream to read CSV data from.
- * 指定用于读取 CSV 数据的二进制输入流
- * @param pLabelParams specifies which row and column should be treated as labels.
- * 指定应作为标签的行和列
- * @param pSeparatorParams specifies which field and row separators should be used.
- * 指定应使用的字段和行分隔符
- * @param pConverterParams specifies how invalid numbers (including empty strings) should be
- * handled.
- * 指定应如何处理无效的数字(包括空字符串)
- * @param pLineReaderParams specifies how special line formats should be treated.
- * 指定应如何处理特殊的行格式
- */
- void Load(std::istream& pStream,
- const LabelParams& pLabelParams = LabelParams(),
- const SeparatorParams& pSeparatorParams = SeparatorParams(),
- const ConverterParams& pConverterParams = ConverterParams(),
- const LineReaderParams& pLineReaderParams = LineReaderParams())
- {
- mPath = "";
- mLabelParams = pLabelParams;
- mSeparatorParams = pSeparatorParams;
- mConverterParams = pConverterParams;
- mLineReaderParams = pLineReaderParams;
- ReadCsv(pStream);
- }
- /**
- * @brief Write Document data to file.
- * 将`Document`数据写入文件
- * @param pPath optionally specifies the path where the CSV-file will be created
- * (if not specified, the original path provided when creating or
- * loading the Document data will be used).
- * 可选参数,指定 CSV 文件将创建的路径
- * (如果未指定,则使用创建或加载 Document 数据时提供的原始路径)
- */
- void Save(const std::string& pPath = std::string())
- {
- if (!pPath.empty())
- {
- mPath = pPath;
- }
- WriteCsv();
- }
- /**
- * @brief Write Document data to stream.
- * 将`Document`数据写入流
- * @param pStream specifies a binary output stream to write the data to.
- * 指定用于写入数据的二进制输出流
- */
- void Save(std::ostream& pStream) const
- {
- WriteCsv(pStream);
- }
- /**
- * @brief Clears loaded Document data.
- * 清除加载的`Document`数据
- *
- */
- void Clear()
- {
- mData.clear();
- mColumnNames.clear();
- mRowNames.clear();
- #ifdef HAS_CODECVT
- mIsUtf16 = false;
- mIsLE = false;
- #endif
- mHasUtf8BOM = false;
- }
- /**
- * @brief Get column index by name.
- * 获取列索引通过列名称
- * @param pColumnName column label name.
- * 列标签名称
- * @returns zero-based column index.
- * 返回零基列索引
- */
- int GetColumnIdx(const std::string& pColumnName) const
- {
- if (mLabelParams.mColumnNameIdx >= 0)
- {
- if (mColumnNames.find(pColumnName) != mColumnNames.end())
- {
- return static_cast<int>(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1);
- }
- }
- return -1;
- }
- /**
- * @brief Get column by index.
- * 通过索引获取列数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @returns vector of column data.
- * 返回列数据的向量
- */
- template<typename T>
- std::vector<T> GetColumn(const size_t pColumnIdx) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<T> column;
- Converter<T> converter(mConverterParams);
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- if (dataColumnIdx < itRow->size())
- {
- T val;
- converter.ToVal(itRow->at(dataColumnIdx), val);
- column.push_back(val);
- }
- else
- {
- const std::string errStr = "requested column index " +
- std::to_string(pColumnIdx) + " >= " +
- std::to_string(itRow->size() - GetDataColumnIndex(0)) +
- " (number of columns on row index " +
- std::to_string(std::distance(mData.begin(), itRow) -
- (mLabelParams.mColumnNameIdx + 1)) + ")";
- throw std::out_of_range(errStr);
- }
- }
- }
- return column;
- }
- /**
- * @brief Get column by index.
- * 通过索引获取列数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pToVal conversion function.
- * 转换函数
- * @returns vector of column data.
- * 返回列数据的向量
- */
- template<typename T>
- std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<T> column;
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- T val;
- pToVal(itRow->at(dataColumnIdx), val);
- column.push_back(val);
- }
- }
- return column;
- }
- /**
- * @brief Get column by name.
- * 通过名称获取列数据
- * @param pColumnName column label name.
- * 列标签名称
- * @returns vector of column data.
- * 返回列数据的向量
- */
- template<typename T>
- std::vector<T> GetColumn(const std::string& pColumnName) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetColumn<T>(static_cast<size_t>(columnIdx));
- }
- /**
- * @brief Get column by name.
- * 通过名称获取列数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pToVal conversion function.
- * 转换函数
- * @returns vector of column data.
- * 返回列数据的向量
- */
- template<typename T>
- std::vector<T> GetColumn(const std::string& pColumnName, ConvFunc<T> pToVal) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetColumn<T>(static_cast<size_t>(columnIdx), pToVal);
- }
- /**
- * @brief Set column by index.
- * 设置列数据通过索引
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pColumn vector of column data.
- * 列数据向量
- */
- template<typename T>
- void SetColumn(const size_t pColumnIdx, const std::vector<T>& pColumn)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- while (GetDataRowIndex(pColumn.size()) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if ((dataColumnIdx + 1) > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
- {
- itRow->resize(GetDataColumnIndex(dataColumnIdx + 1));
- }
- }
- }
- Converter<T> converter(mConverterParams);
- for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
- {
- std::string str;
- converter.ToStr(*itRow, str);
- mData.at(static_cast<size_t>(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at(
- dataColumnIdx) = str;
- }
- }
- /**
- * @brief Set column by name.
- * 通过名称设置列数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pColumn vector of column data.
- * 列数据向量
- */
- template<typename T>
- void SetColumn(const std::string& pColumnName, const std::vector<T>& pColumn)
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- SetColumn<T>(static_cast<size_t>(columnIdx), pColumn);
- }
- /**
- * @brief Remove column by index.
- * 通过索引移除列
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- */
- void RemoveColumn(const size_t pColumnIdx)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
- {
- itRow->erase(itRow->begin() + static_cast<int>(dataColumnIdx));
- }
- }
- UpdateColumnNames();
- }
- /**
- * @brief Remove column by name.
- * 通过名称移除列
- * @param pColumnName column label name.
- * 列标签名称
- */
- void RemoveColumn(const std::string& pColumnName)
- {
- int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- RemoveColumn(static_cast<size_t>(columnIdx));
- }
- /**
- * @brief Insert column at specified index.
- * 在指定索引处插入列
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pColumn vector of column data (optional argument).
- * 列数据向量(可选参数)
- * @param pColumnName column label name (optional argument).
- * 列标签名称(可选参数)
- */
- template<typename T>
- void InsertColumn(const size_t pColumnIdx, const std::vector<T>& pColumn = std::vector<T>(),
- const std::string& pColumnName = std::string())
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- std::vector<std::string> column;
- if (pColumn.empty())
- {
- column.resize(GetDataRowCount());
- }
- else
- {
- column.resize(GetDataRowIndex(pColumn.size()));
- Converter<T> converter(mConverterParams);
- for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
- {
- std::string str;
- converter.ToStr(*itRow, str);
- const size_t rowIdx =
- static_cast<size_t>(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1));
- column.at(rowIdx) = str;
- }
- }
- while (column.size() > GetDataRowCount())
- {
- std::vector<std::string> row;
- const size_t columnCount = std::max<size_t>(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1),
- GetDataColumnCount());
- row.resize(columnCount);
- mData.push_back(row);
- }
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
- {
- const size_t rowIdx = static_cast<size_t>(std::distance(mData.begin(), itRow));
- itRow->insert(itRow->begin() + static_cast<int>(dataColumnIdx), column.at(rowIdx));
- }
- }
- if (!pColumnName.empty())
- {
- SetColumnName(pColumnIdx, pColumnName);
- }
- UpdateColumnNames();
- }
- /**
- * @brief Get number of data columns (excluding label columns).
- * 获取数据列的数量(不包括标签列)
- * @returns column count.
- * 返回列数
- */
- size_t GetColumnCount() const
- {
- const int count = static_cast<int>((mData.size() > 0) ? mData.at(0).size() : 0) -
- (mLabelParams.mRowNameIdx + 1);
- return (count >= 0) ? static_cast<size_t>(count) : 0;
- }
- /**
- * @brief Get row index by name.
- * 通过名称获取行索引
- * @param pRowName row label name.
- * 行标签名称
- * @returns zero-based row index.
- * 返回零基行索引
- */
- int GetRowIdx(const std::string& pRowName) const
- {
- if (mLabelParams.mRowNameIdx >= 0)
- {
- if (mRowNames.find(pRowName) != mRowNames.end())
- {
- return static_cast<int>(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1);
- }
- }
- return -1;
- }
- /**
- * @brief Get row by index.
- * 通过索引获取行数据
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @returns vector of row data.
- * 返回行数据的向量
- */
- template<typename T>
- std::vector<T> GetRow(const size_t pRowIdx) const
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- std::vector<T> row;
- Converter<T> converter(mConverterParams);
- for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
- {
- if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
- {
- T val;
- converter.ToVal(*itCol, val);
- row.push_back(val);
- }
- }
- return row;
- }
- /**
- * @brief Get row by index.
- * 通过索引获取行数据
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pToVal conversion function.
- * 转换函数
- * @returns vector of row data.
- * 返回行数据的向量
- */
- template<typename T>
- std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- std::vector<T> row;
- Converter<T> converter(mConverterParams);
- for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
- {
- if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
- {
- T val;
- pToVal(*itCol, val);
- row.push_back(val);
- }
- }
- return row;
- }
- /**
- * @brief Get row by name.
- * 通过名称获取行数据
- * @param pRowName row label name.
- * 行标签名称
- * @returns vector of row data.
- * 返回行数据的向量
- */
- template<typename T>
- std::vector<T> GetRow(const std::string& pRowName) const
- {
- int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetRow<T>(static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get row by name.
- * 通过名称获取行数据
- * @param pRowName row label name.
- * 行标签名称
- * @param pToVal conversion function.
- * 转换函数
- * @returns vector of row data.
- * 返回行数据的向量
- */
- template<typename T>
- std::vector<T> GetRow(const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetRow<T>(static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Set row by index.
- * 通过索引设置行数据
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pRow vector of row data.
- * 行数据的向量
- */
- template<typename T>
- void SetRow(const size_t pRowIdx, const std::vector<T>& pRow)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- while ((dataRowIdx + 1) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if (pRow.size() > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
- {
- itRow->resize(GetDataColumnIndex(pRow.size()));
- }
- }
- }
- Converter<T> converter(mConverterParams);
- for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
- {
- std::string str;
- converter.ToStr(*itCol, str);
- mData.at(dataRowIdx).at(static_cast<size_t>(std::distance(pRow.begin(),
- itCol) + mLabelParams.mRowNameIdx + 1)) = str;
- }
- }
- /**
- * @brief Set row by name.
- * 通过名称设置行数据
- * @param pRowName row label name.
- * 行标签名称
- * @param pRow vector of row data.
- * 行数据的向量
- */
- template<typename T>
- void SetRow(const std::string& pRowName, const std::vector<T>& pRow)
- {
- int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return SetRow<T>(static_cast<size_t>(rowIdx), pRow);
- }
- /**
- * @brief Remove row by index.
- * 通过索引移除行
- * @param pRowIdx zero-based row index.
- * 零基行索引
- */
- void RemoveRow(const size_t pRowIdx)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- mData.erase(mData.begin() + static_cast<int>(dataRowIdx));
- UpdateRowNames();
- }
- /**
- * @brief Remove row by name.
- * 通过名称移除行
- * @param pRowName row label name.
- * 行标签名称
- */
- void RemoveRow(const std::string& pRowName)
- {
- int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- RemoveRow(static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Insert row at specified index.
- * 在指定索引处插入行
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pRow vector of row data (optional argument).
- * 行数据的向量(可选参数)
- * @param pRowName row label name (optional argument).
- * 行标签名称(可选参数)
- */
- template<typename T>
- void InsertRow(const size_t pRowIdx, const std::vector<T>& pRow = std::vector<T>(),
- const std::string& pRowName = std::string())
- {
- const size_t rowIdx = GetDataRowIndex(pRowIdx);
- std::vector<std::string> row;
- if (pRow.empty())
- {
- row.resize(GetDataColumnCount());
- }
- else
- {
- row.resize(GetDataColumnIndex(pRow.size()));
- Converter<T> converter(mConverterParams);
- for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
- {
- std::string str;
- converter.ToStr(*itCol, str);
- row.at(static_cast<size_t>(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str;
- }
- }
- while (rowIdx > GetDataRowCount())
- {
- std::vector<std::string> tempRow;
- tempRow.resize(GetDataColumnCount());
- mData.push_back(tempRow);
- }
- mData.insert(mData.begin() + static_cast<int>(rowIdx), row);
- if (!pRowName.empty())
- {
- SetRowName(pRowIdx, pRowName);
- }
- UpdateRowNames();
- }
- /**
- * @brief Get number of data rows (excluding label rows).
- * 获取数据行的数量(不包括标签行)
- * @returns row count.
- * 返回行数
- */
- size_t GetRowCount() const
- {
- const int count = static_cast<int>(mData.size()) - (mLabelParams.mColumnNameIdx + 1);
- return (count >= 0) ? static_cast<size_t>(count) : 0;
- }
- /**
- * @brief Get cell by index.
- * 通过索引获取单元格数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- T val;
- Converter<T> converter(mConverterParams);
- converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
- return val;
- }
- /**
- * @brief Get cell by index.
- * 通过索引获取单元格数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pToVal conversion function.
- * 转换函数
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- T val;
- pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
- return val;
- }
- /**
- * @brief Get cell by name.
- * 通过名称获取单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowName row label name.
- * 行标签名称
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const std::string& pRowName) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get cell by name.
- * 通过名称获取单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowName row label name.
- * 行标签名称
- * @param pToVal conversion function.
- * 转换函数
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Get cell by column name and row index.
- * 通过列名称和行索引获取单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const size_t pRowIdx) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx);
- }
- /**
- * @brief Get cell by column name and row index.
- * 通过列名称和行索引获取单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pToVal conversion function.
- * 转换函数
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc<T> pToVal) const
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pToVal);
- }
- /**
- * @brief Get cell by column index and row name.
- * 通过列索引和行名称获取单元格数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pRowName row label name.
- * 行标签名称
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const std::string& pRowName) const
- {
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx));
- }
- /**
- * @brief Get cell by column index and row name.
- * 通过列索引和行名称获取单元格数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pRowName row label name.
- * 行标签名称
- * @param pToVal conversion function.
- * 转换函数
- * @returns cell data.
- * 单元格数据
- */
- template<typename T>
- T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc<T> pToVal) const
- {
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pToVal);
- }
- /**
- * @brief Set cell by index.
- * 通过索引设置单元格数据
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pCell cell data.
- * 单元格数据
- */
- template<typename T>
- void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell)
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- while ((dataRowIdx + 1) > GetDataRowCount())
- {
- std::vector<std::string> row;
- row.resize(GetDataColumnCount());
- mData.push_back(row);
- }
- if ((dataColumnIdx + 1) > GetDataColumnCount())
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
- {
- itRow->resize(dataColumnIdx + 1);
- }
- }
- }
- std::string str;
- Converter<T> converter(mConverterParams);
- converter.ToStr(pCell, str);
- mData.at(dataRowIdx).at(dataColumnIdx) = str;
- }
- /**
- * @brief Set cell by name.
- * 通过列名称和行名称设置单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowName row label name.
- * 行标签名称
- * @param pCell cell data.
- * 单元格数据
- */
- template<typename T>
- void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell)
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- SetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pCell);
- }
- /**
- * @brief Set cell by column index and row name.
- * 通过列索引和行名称设置单元格数据
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pRowName row label name.
- * 行标签名称
- * @param pCell cell data.
- * 单元格数据
- */
- template<typename T>
- void SetCell(const size_t pColumnIdx, const std::string& pRowName, const T& pCell)
- {
- const int rowIdx = GetRowIdx(pRowName);
- if (rowIdx < 0)
- {
- throw std::out_of_range("row not found: " + pRowName);
- }
- SetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pCell);
- }
- /**
- * @brief Set cell by column name and row index.
- * 通过列名称和行索引设置单元格数据
- * @param pColumnName column label name.
- * 列标签名称
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pCell cell data.
- * 单元格数据
- */
- template<typename T>
- void SetCell(const std::string& pColumnName, const size_t pRowIdx, const T& pCell)
- {
- const int columnIdx = GetColumnIdx(pColumnName);
- if (columnIdx < 0)
- {
- throw std::out_of_range("column not found: " + pColumnName);
- }
- SetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pCell);
- }
- /**
- * @brief Get column name
- * 获取列名称
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @returns column name.
- * 返回列名称
- */
- std::string GetColumnName(const size_t pColumnIdx) const
- {
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- if (mLabelParams.mColumnNameIdx < 0)
- {
- throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
- }
- return mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx);
- }
- /**
- * @brief Set column name
- * 设置列名称
- * @param pColumnIdx zero-based column index.
- * 零基列索引
- * @param pColumnName column name.
- * 列名称
- */
- void SetColumnName(size_t pColumnIdx, const std::string& pColumnName)
- {
- if (mLabelParams.mColumnNameIdx < 0)
- {
- throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
- }
- const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
- mColumnNames[pColumnName] = dataColumnIdx;
- // increase table size if necessary:
- const size_t rowIdx = static_cast<size_t>(mLabelParams.mColumnNameIdx);
- if (rowIdx >= mData.size())
- {
- mData.resize(rowIdx + 1);
- }
- auto& row = mData[rowIdx];
- if (dataColumnIdx >= row.size())
- {
- row.resize(dataColumnIdx + 1);
- }
- mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName;
- }
- /**
- * @brief Get column names
- * 获取列名称
- * @returns vector of column names.
- * 返回列名称的向量
- */
- std::vector<std::string> GetColumnNames() const
- {
- if (mLabelParams.mColumnNameIdx >= 0)
- {
- return std::vector<std::string>(mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).begin() +
- (mLabelParams.mRowNameIdx + 1),
- mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).end());
- }
- return std::vector<std::string>();
- }
- /**
- * @brief Get row name
- * 获取行名称
- * @param pRowIdx zero-based column index.
- * 零基列索引
- * @returns row name.
- * 返回行名称
- */
- std::string GetRowName(const size_t pRowIdx) const
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- if (mLabelParams.mRowNameIdx < 0)
- {
- throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
- }
- return mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx));
- }
- /**
- * @brief Set row name
- * 设置行名称
- * @param pRowIdx zero-based row index.
- * 零基行索引
- * @param pRowName row name.
- * 行名称
- */
- void SetRowName(size_t pRowIdx, const std::string& pRowName)
- {
- const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
- mRowNames[pRowName] = dataRowIdx;
- if (mLabelParams.mRowNameIdx < 0)
- {
- throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
- }
- // increase table size if necessary:
- if (dataRowIdx >= mData.size())
- {
- mData.resize(dataRowIdx + 1);
- }
- auto& row = mData[dataRowIdx];
- if (mLabelParams.mRowNameIdx >= static_cast<int>(row.size()))
- {
- row.resize(static_cast<size_t>(mLabelParams.mRowNameIdx) + 1);
- }
- mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)) = pRowName;
- }
- /**
- * @brief Get row names
- * 获取行名称
- * @returns vector of row names.
- * 返回行名称的向量
- */
- std::vector<std::string> GetRowNames() const
- {
- std::vector<std::string> rownames;
- if (mLabelParams.mRowNameIdx >= 0)
- {
- for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
- {
- if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
- {
- rownames.push_back(itRow->at(static_cast<size_t>(mLabelParams.mRowNameIdx)));
- }
- }
- }
- return rownames;
- }
- private:
- void ReadCsv()
- {
- std::ifstream stream;
- stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
- stream.open(mPath, std::ios::binary);
- ReadCsv(stream);
- }
- void ReadCsv(std::istream& pStream)
- {
- Clear();
- pStream.seekg(0, std::ios::end);
- std::streamsize length = pStream.tellg();
- pStream.seekg(0, std::ios::beg);
- #ifdef HAS_CODECVT
- std::vector<char> bom2b(2, '\0');
- if (length >= 2)
- {
- pStream.read(bom2b.data(), 2);
- pStream.seekg(0, std::ios::beg);
- }
- static const std::vector<char> bomU16le = { '\xff', '\xfe' };
- static const std::vector<char> bomU16be = { '\xfe', '\xff' };
- if ((bom2b == bomU16le) || (bom2b == bomU16be))
- {
- mIsUtf16 = true;
- mIsLE = (bom2b == bomU16le);
- std::wifstream wstream;
- wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
- wstream.open(mPath, std::ios::binary);
- if (mIsLE)
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- static_cast<std::codecvt_mode>(std::consume_header |
- std::little_endian)>));
- }
- else
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- std::consume_header>));
- }
- std::wstringstream wss;
- wss << wstream.rdbuf();
- std::string utf8 = ToString(wss.str());
- std::stringstream ss(utf8);
- ParseCsv(ss, static_cast<std::streamsize>(utf8.size()));
- }
- else
- #endif
- {
- // check for UTF-8 Byte order mark and skip it when found
- if (length >= 3)
- {
- std::vector<char> bom3b(3, '\0');
- pStream.read(bom3b.data(), 3);
- if (bom3b != s_Utf8BOM)
- {
- // file does not start with a UTF-8 Byte order mark
- pStream.seekg(0, std::ios::beg);
- }
- else
- {
- // file did start with a UTF-8 Byte order mark, simply skip it
- length -= 3;
- mHasUtf8BOM = true;
- }
- }
- ParseCsv(pStream, length);
- }
- }
- void ParseCsv(std::istream& pStream, std::streamsize p_FileLength)
- {
- const std::streamsize bufLength = 64 * 1024;
- char* buffer = new char[bufLength];
- std::vector<std::string> row;
- std::string cell;
- bool quoted = false;
- int cr = 0;
- int lf = 0;
- while (p_FileLength > 0)
- {
- const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength);
- pStream.read(buffer, toReadLength);
- // With user-specified istream opened in non-binary mode on windows, we may have a
- // data length mismatch, so ensure we don't parse outside actual data length read.
- const std::streamsize readLength = pStream.gcount();
- if (readLength <= 0)
- {
- break;
- }
- for (size_t i = 0; i < static_cast<size_t>(readLength); ++i)
- {
- if (buffer[i] == mSeparatorParams.mQuoteChar)
- {
- if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar))
- {
- quoted = !quoted;
- }
- else if (mSeparatorParams.mTrim)
- {
- // allow whitespace before first mQuoteChar
- const auto firstQuote = std::find(cell.begin(), cell.end(), mSeparatorParams.mQuoteChar);
- if (std::all_of(cell.begin(), firstQuote, [](int ch) { return isspace(ch); }))
- {
- quoted = !quoted;
- }
- }
- cell += buffer[i];
- }
- else if (buffer[i] == mSeparatorParams.mSeparator)
- {
- if (!quoted)
- {
- row.push_back(Unquote(Trim(cell)));
- cell.clear();
- }
- else
- {
- cell += buffer[i];
- }
- }
- else if (buffer[i] == '\r')
- {
- if (mSeparatorParams.mQuotedLinebreaks && quoted)
- {
- cell += buffer[i];
- }
- else
- {
- ++cr;
- }
- }
- else if (buffer[i] == '\n')
- {
- if (mSeparatorParams.mQuotedLinebreaks && quoted)
- {
- cell += buffer[i];
- }
- else
- {
- ++lf;
- if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty())
- {
- // skip empty line
- }
- else
- {
- row.push_back(Unquote(Trim(cell)));
- if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
- (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
- {
- // skip comment line
- }
- else
- {
- mData.push_back(row);
- }
- cell.clear();
- row.clear();
- quoted = false;
- }
- }
- }
- else
- {
- cell += buffer[i];
- }
- }
- p_FileLength -= readLength;
- }
- // Handle last row / cell without linebreak
- if (row.empty() && cell.empty())
- {
- // skip empty trailing line
- }
- else
- {
- row.push_back(Unquote(Trim(cell)));
- if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
- (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
- {
- // skip comment line
- }
- else
- {
- mData.push_back(row);
- }
- cell.clear();
- row.clear();
- quoted = false;
- }
-
- delete [] buffer;
- // Assume CR/LF if at least half the linebreaks have CR
- mSeparatorParams.mHasCR = (cr > (lf / 2));
- // Set up column labels
- UpdateColumnNames();
- // Set up row labels
- UpdateRowNames();
- }
- void WriteCsv() const
- {
- #ifdef HAS_CODECVT
- if (mIsUtf16)
- {
- std::stringstream ss;
- WriteCsv(ss);
- std::string utf8 = ss.str();
- std::wstring wstr = ToWString(utf8);
- std::wofstream wstream;
- wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit);
- wstream.open(mPath, std::ios::binary | std::ios::trunc);
- if (mIsLE)
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff,
- static_cast<std::codecvt_mode>(std::little_endian)>));
- }
- else
- {
- wstream.imbue(std::locale(wstream.getloc(),
- new std::codecvt_utf16<wchar_t, 0x10ffff>));
- }
- wstream << static_cast<wchar_t>(0xfeff);
- wstream << wstr;
- }
- else
- #endif
- {
- std::ofstream stream;
- stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
- stream.open(mPath, std::ios::binary | std::ios::trunc);
- if (mHasUtf8BOM)
- {
- stream.write(s_Utf8BOM.data(), 3);
- }
- WriteCsv(stream);
- }
- }
- void WriteCsv(std::ostream& pStream) const
- {
- for (auto itr = mData.begin(); itr != mData.end(); ++itr)
- {
- for (auto itc = itr->begin(); itc != itr->end(); ++itc)
- {
- if (mSeparatorParams.mAutoQuote &&
- ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) ||
- (itc->find(' ') != std::string::npos) ||
- (itc->find('\n') != std::string::npos)))
- {
- // escape quotes in string
- std::string str = *itc;
- const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
- ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr);
- pStream << quoteCharStr << str << quoteCharStr;
- }
- else
- {
- pStream << *itc;
- }
- if (std::distance(itc, itr->end()) > 1)
- {
- pStream << mSeparatorParams.mSeparator;
- }
- }
- pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n");
- }
- }
- size_t GetDataRowCount() const
- {
- return mData.size();
- }
- size_t GetDataColumnCount() const
- {
- const size_t firstDataRow = static_cast<size_t>((mLabelParams.mColumnNameIdx >= 0) ? mLabelParams.mColumnNameIdx : 0);
- return (mData.size() > firstDataRow) ? mData.at(firstDataRow).size() : 0;
- }
- inline size_t GetDataRowIndex(const size_t pRowIdx) const
- {
- const size_t firstDataRow = static_cast<size_t>((mLabelParams.mColumnNameIdx + 1 >= 0) ? mLabelParams.mColumnNameIdx + 1 : 0);
- return pRowIdx + firstDataRow;
- }
- inline size_t GetDataColumnIndex(const size_t pColumnIdx) const
- {
- const size_t firstDataColumn = static_cast<size_t>((mLabelParams.mRowNameIdx + 1 >= 0) ? mLabelParams.mRowNameIdx + 1 : 0);
- return pColumnIdx + firstDataColumn;
- }
- std::string Trim(const std::string& pStr) const
- {
- if (mSeparatorParams.mTrim)
- {
- std::string str = pStr;
- // ltrim
- str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); }));
- // rtrim
- str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end());
- return str;
- }
- else
- {
- return pStr;
- }
- }
- std::string Unquote(const std::string& pStr) const
- {
- if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) &&
- (pStr.front() == mSeparatorParams.mQuoteChar) &&
- (pStr.back() == mSeparatorParams.mQuoteChar))
- {
- // remove start/end quotes
- std::string str = pStr.substr(1, pStr.size() - 2);
- // unescape quotes in string
- const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
- ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr);
- return str;
- }
- else
- {
- return pStr;
- }
- }
- void UpdateColumnNames()
- {
- mColumnNames.clear();
- if ((mLabelParams.mColumnNameIdx >= 0) &&
- (static_cast<int>(mData.size()) > mLabelParams.mColumnNameIdx))
- {
- size_t i = 0;
- for (auto& columnName : mData[static_cast<size_t>(mLabelParams.mColumnNameIdx)])
- {
- mColumnNames[columnName] = i++;
- }
- }
- }
- void UpdateRowNames()
- {
- mRowNames.clear();
- if ((mLabelParams.mRowNameIdx >= 0) &&
- (static_cast<int>(mData.size()) >
- (mLabelParams.mColumnNameIdx + 1)))
- {
- size_t i = 0;
- for (auto& dataRow : mData)
- {
- if (static_cast<int>(dataRow.size()) > mLabelParams.mRowNameIdx)
- {
- mRowNames[dataRow[static_cast<size_t>(mLabelParams.mRowNameIdx)]] = i++;
- }
- }
- }
- }
- #ifdef HAS_CODECVT
- #if defined(_MSC_VER)
- #pragma warning (push)
- #pragma warning (disable: 4996)
- #endif
- static std::string ToString(const std::wstring& pWStr)
- {
- return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.to_bytes(pWStr);
- }
- static std::wstring ToWString(const std::string& pStr)
- {
- return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.from_bytes(pStr);
- }
- #if defined(_MSC_VER)
- #pragma warning (pop)
- #endif
- #endif
- static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace)
- {
- size_t pos = 0;
- while ((pos = pStr.find(pSearch, pos)) != std::string::npos)
- {
- pStr.replace(pos, pSearch.size(), pReplace);
- pos += pReplace.size();
- }
- }
- private:
- std::string mPath;
- LabelParams mLabelParams;
- SeparatorParams mSeparatorParams;
- ConverterParams mConverterParams;
- LineReaderParams mLineReaderParams;
- std::vector<std::vector<std::string>> mData;
- std::map<std::string, size_t> mColumnNames;
- std::map<std::string, size_t> mRowNames;
- #ifdef HAS_CODECVT
- bool mIsUtf16 = false;
- bool mIsLE = false;
- #endif
- bool mHasUtf8BOM = false;
- };
- }
|