rapidcsv.h 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132
  1. /*
  2. * rapidcsv.h
  3. *
  4. * URL: https://github.com/d99kris/rapidcsv
  5. * Version: 8.84
  6. *
  7. * Copyright (C) 2017-2024 Kristofer Berggren
  8. * All rights reserved.
  9. *
  10. * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details.
  11. *
  12. */
  13. #pragma once
  14. #include <algorithm>
  15. #include <cassert>
  16. #include <cmath>
  17. #ifdef HAS_CODECVT
  18. #include <codecvt>
  19. #include <locale>
  20. #endif
  21. #include <fstream>
  22. #include <functional>
  23. #include <iomanip>
  24. #include <iostream>
  25. #include <limits>
  26. #include <map>
  27. #include <sstream>
  28. #include <string>
  29. #include <typeinfo>
  30. #include <vector>
  31. namespace rapidcsv
  32. {
  33. #if defined(_MSC_VER)
  34. static const bool sPlatformHasCR = true;
  35. #else
  36. static const bool sPlatformHasCR = false;
  37. #endif
  38. static const std::vector<char> s_Utf8BOM = { '\xef', '\xbb', '\xbf' };
  39. /**
  40. * @brief Datastructure holding parameters controlling how invalid numbers (including
  41. * empty strings) should be handled.
  42. * @brief 保存控制如何处理无效数字(包括空字符串)的参数的数据结构
  43. */
  44. struct ConverterParams
  45. {
  46. /**
  47. * @brief Constructor
  48. * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be
  49. * converted to a default numerical value, instead of causing
  50. * an exception to be thrown (default).
  51. * 指定是否将非数值字符串转换为默认的数值,
  52. * 而不是抛出异常(默认情况下抛出异常)
  53. * @param pDefaultFloat floating-point default value to represent invalid numbers.
  54. * 表示无效数字的浮点数默认值
  55. * @param pDefaultInteger integer default value to represent invalid numbers.
  56. * 表示无效数字的整数默认值
  57. * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default
  58. * true).
  59. * 指定是否遵循 `LC_NUMERIC` 区域设置
  60. */
  61. explicit ConverterParams(const bool pHasDefaultConverter = false,
  62. const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(),
  63. const long long pDefaultInteger = 0,
  64. const bool pNumericLocale = true)
  65. : mHasDefaultConverter(pHasDefaultConverter)
  66. , mDefaultFloat(pDefaultFloat)
  67. , mDefaultInteger(pDefaultInteger)
  68. , mNumericLocale(pNumericLocale)
  69. {
  70. }
  71. /**
  72. * @brief specifies if conversion of non-numerical strings shall be converted to a default
  73. * numerical value, instead of causing an exception to be thrown (default).
  74. * 指定是否将非数值字符串转换为默认的数值,而不是抛出异常(默认情况下抛出异常)
  75. */
  76. bool mHasDefaultConverter;
  77. /**
  78. * @brief floating-point default value to represent invalid numbers.
  79. * 表示无效数字的浮点数默认值
  80. */
  81. long double mDefaultFloat;
  82. /**
  83. * @brief integer default value to represent invalid numbers.
  84. * 表示无效数字的整数默认值
  85. */
  86. long long mDefaultInteger;
  87. /**
  88. * @brief specifies whether to honor `LC_NUMERIC` locale.
  89. * 指定是否遵循 `LC_NUMERIC` 区域设置
  90. * @property 不同的区域设置中,小数点符号可能是不同的
  91. */
  92. bool mNumericLocale;
  93. };
  94. /**
  95. * @brief Exception thrown when attempting to access Document data in a datatype which
  96. * is not supported by the Converter class.
  97. * 当尝试以`Converter`类不支持的数据类型访问`Document`数据时抛出的异常
  98. */
  99. class no_converter : public std::exception
  100. {
  101. public:
  102. /**
  103. * @brief Provides details about the exception
  104. * 提供有关异常的详细信息
  105. * @returns an explanatory string
  106. * 描述性字符串
  107. */
  108. const char* what() const throw() override
  109. {
  110. return "unsupported conversion datatype";
  111. }
  112. };
  113. /**
  114. * @brief Class providing conversion to/from numerical datatypes and strings. Only
  115. * intended for rapidcsv internal usage, but exposed externally to allow
  116. * specialization for custom datatype conversions.
  117. * 提供数值数据类型和字符串之间转换的类
  118. * 仅供`rapidcsv`内部使用,但公开以允许对自定义数据类型进行特化。
  119. */
  120. template<typename T>
  121. class Converter
  122. {
  123. public:
  124. /**
  125. * @brief Constructor
  126. * @param pConverterParams specifies how conversion of non-numerical values to
  127. * numerical datatype shall be handled.
  128. * 指定如何处理非数值值转换为数值数据类型的参数。
  129. */
  130. Converter(const ConverterParams& pConverterParams)
  131. : mConverterParams(pConverterParams)
  132. {
  133. }
  134. /**
  135. * @brief Converts numerical value to string representation.
  136. * 将数值转换为字符串表示
  137. * @param pVal numerical value
  138. * @param pStr output string
  139. */
  140. void ToStr(const T& pVal, std::string& pStr) const
  141. {
  142. if (typeid(T) == typeid(int) ||
  143. typeid(T) == typeid(long) ||
  144. typeid(T) == typeid(long long) ||
  145. typeid(T) == typeid(unsigned) ||
  146. typeid(T) == typeid(unsigned long) ||
  147. typeid(T) == typeid(unsigned long long) ||
  148. typeid(T) == typeid(long double) ||
  149. typeid(T) == typeid(char))
  150. {
  151. std::ostringstream out;
  152. out << pVal;
  153. pStr = out.str();
  154. }
  155. else if (typeid(T) == typeid(float))
  156. {
  157. std::ostringstream out;
  158. out << std::setprecision(9) << pVal;
  159. pStr = out.str();
  160. }
  161. else if (typeid(T) == typeid(double))
  162. {
  163. std::ostringstream out;
  164. out << std::setprecision(17) << pVal;
  165. pStr = out.str();
  166. }
  167. else
  168. {
  169. throw no_converter();
  170. }
  171. }
  172. /**
  173. * @brief Converts string holding a numerical value to numerical datatype representation.
  174. * 将包含数值的字符串转换为数值数据类型表示
  175. * @param pVal numerical value
  176. * @param pStr output string
  177. */
  178. void ToVal(const std::string& pStr, T& pVal) const
  179. {
  180. try
  181. {
  182. if (typeid(T) == typeid(int))
  183. {
  184. pVal = static_cast<T>(std::stoi(pStr));
  185. return;
  186. }
  187. else if (typeid(T) == typeid(long))
  188. {
  189. pVal = static_cast<T>(std::stol(pStr));
  190. return;
  191. }
  192. else if (typeid(T) == typeid(long long))
  193. {
  194. pVal = static_cast<T>(std::stoll(pStr));
  195. return;
  196. }
  197. else if (typeid(T) == typeid(unsigned))
  198. {
  199. pVal = static_cast<T>(std::stoul(pStr));
  200. return;
  201. }
  202. else if (typeid(T) == typeid(unsigned long))
  203. {
  204. pVal = static_cast<T>(std::stoul(pStr));
  205. return;
  206. }
  207. else if (typeid(T) == typeid(unsigned long long))
  208. {
  209. pVal = static_cast<T>(std::stoull(pStr));
  210. return;
  211. }
  212. }
  213. catch (...)
  214. {
  215. if (!mConverterParams.mHasDefaultConverter)
  216. {
  217. throw;
  218. }
  219. else
  220. {
  221. pVal = static_cast<T>(mConverterParams.mDefaultInteger);
  222. return;
  223. }
  224. }
  225. try
  226. {
  227. if (mConverterParams.mNumericLocale)
  228. {
  229. if (typeid(T) == typeid(float))
  230. {
  231. pVal = static_cast<T>(std::stof(pStr));
  232. return;
  233. }
  234. else if (typeid(T) == typeid(double))
  235. {
  236. pVal = static_cast<T>(std::stod(pStr));
  237. return;
  238. }
  239. else if (typeid(T) == typeid(long double))
  240. {
  241. pVal = static_cast<T>(std::stold(pStr));
  242. return;
  243. }
  244. }
  245. else
  246. {
  247. if ((typeid(T) == typeid(float)) ||
  248. (typeid(T) == typeid(double)) ||
  249. (typeid(T) == typeid(long double)))
  250. {
  251. std::istringstream iss(pStr);
  252. iss.imbue(std::locale::classic());
  253. iss >> pVal;
  254. if (iss.fail() || iss.bad() || !iss.eof())
  255. {
  256. throw std::invalid_argument("istringstream: no conversion");
  257. }
  258. return;
  259. }
  260. }
  261. }
  262. catch (...)
  263. {
  264. if (!mConverterParams.mHasDefaultConverter)
  265. {
  266. throw;
  267. }
  268. else
  269. {
  270. pVal = static_cast<T>(mConverterParams.mDefaultFloat);
  271. return;
  272. }
  273. }
  274. if (typeid(T) == typeid(char))
  275. {
  276. pVal = static_cast<T>(pStr[0]);
  277. return;
  278. }
  279. else
  280. {
  281. throw no_converter();
  282. }
  283. }
  284. private:
  285. const ConverterParams& mConverterParams;
  286. };
  287. /**
  288. * @brief Specialized implementation handling string to string conversion.
  289. * 专门用于处理字符串到字符串转换的特化实现
  290. * @param pVal string
  291. * @param pStr string
  292. */
  293. template<>
  294. inline void Converter<std::string>::ToStr(const std::string& pVal, std::string& pStr) const
  295. {
  296. pStr = pVal;
  297. }
  298. /**
  299. * @brief Specialized implementation handling string to string conversion.
  300. * 专门用于处理字符串到字符串转换的特化实现
  301. * @param pVal string
  302. * @param pStr string
  303. */
  304. template<>
  305. inline void Converter<std::string>::ToVal(const std::string& pStr, std::string& pVal) const
  306. {
  307. pVal = pStr;
  308. }
  309. template<typename T>
  310. using ConvFunc = std::function<void (const std::string & pStr, T & pVal)>;
  311. /**
  312. * @brief Datastructure holding parameters controlling which row and column should be
  313. * treated as labels.
  314. * 保存控制哪些行和列应被视为标签的参数的数据结构
  315. */
  316. struct LabelParams
  317. {
  318. /**
  319. * @brief Constructor
  320. * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting
  321. * it to -1 prevents column lookup by label name, and gives access
  322. * to all rows as document data. Default: 0
  323. * 指定列标签的基于零的行索引,将其设置为 -1
  324. * 将阻止通过标签名进行列查找,并允许访问所有行作为文档数据。
  325. * 默认值:0
  326. * @param pRowNameIdx specifies the zero-based column index of the row labels, setting
  327. * it to -1 prevents row lookup by label name, and gives access
  328. * to all columns as document data. Default: -1
  329. * 指定行标签的基于零的列索引,将其设置为 -1
  330. * 将阻止通过标签名进行行查找,并允许访问所有列作为文档数据。
  331. * 默认值:-1
  332. */
  333. explicit LabelParams(const int pColumnNameIdx = 0, const int pRowNameIdx = -1)
  334. : mColumnNameIdx(pColumnNameIdx)
  335. , mRowNameIdx(pRowNameIdx)
  336. {
  337. if (mColumnNameIdx < -1)
  338. {
  339. const std::string errStr = "invalid column name index " +
  340. std::to_string(mColumnNameIdx) + " < -1";
  341. throw std::out_of_range(errStr);
  342. }
  343. if (mRowNameIdx < -1)
  344. {
  345. const std::string errStr = "invalid row name index " +
  346. std::to_string(mRowNameIdx) + " < -1";
  347. throw std::out_of_range(errStr);
  348. }
  349. }
  350. /**
  351. * @brief specifies the zero-based row index of the column labels.
  352. * 指定列标签的基于零的行索引
  353. */
  354. int mColumnNameIdx;
  355. /**
  356. * @brief specifies the zero-based column index of the row labels.
  357. * 指定行标签的基于零的列索引
  358. */
  359. int mRowNameIdx;
  360. };
  361. /**
  362. * @brief Datastructure holding parameters controlling how the CSV data fields are separated.
  363. * 保存控制如何分隔`CSV`数据字段的参数的数据结构
  364. */
  365. struct SeparatorParams
  366. {
  367. /**
  368. * @brief Constructor
  369. * @param pSeparator specifies the column separator (default ',').
  370. * 指定列分隔符(默认是',')
  371. * @param pTrim specifies whether to trim leading and trailing spaces from
  372. * cells read (default false).
  373. * 指定是否修剪读取到的单元格中的前导和尾随空格(默认是 false)
  374. * @param pHasCR specifies whether a new document (i.e. not an existing document read)
  375. * should use CR/LF instead of only LF (default is to use standard
  376. * behavior of underlying platforms - CR/LF for Win, and LF for others).
  377. * 指定新文档(即,不是读取的现有文档)是否应使用 CR/LF 而不是仅使用 LF
  378. * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false)
  379. * 指定是否允许带引号的文本中包含换行符(默认是 false)
  380. * @param pAutoQuote specifies whether to automatically dequote data during read, and add
  381. * quotes during write (default true).
  382. * 指定在读取时是否自动去除引号,并在写入时添加引号(默认是 true)
  383. * @param pQuoteChar specifies the quote character (default '\"').
  384. * 指定引号字符(默认是 '\"')
  385. */
  386. explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false,
  387. const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false,
  388. const bool pAutoQuote = true, const char pQuoteChar = '"')
  389. : mSeparator(pSeparator)
  390. , mTrim(pTrim)
  391. , mHasCR(pHasCR)
  392. , mQuotedLinebreaks(pQuotedLinebreaks)
  393. , mAutoQuote(pAutoQuote)
  394. , mQuoteChar(pQuoteChar)
  395. {
  396. }
  397. /**
  398. * @brief specifies the column separator.
  399. * 指定列分隔符
  400. */
  401. char mSeparator;
  402. /**
  403. * @brief specifies whether to trim leading and trailing spaces from cells read.
  404. * 指定是否修剪读取到的单元格中的前导和尾随空格
  405. */
  406. bool mTrim;
  407. /**
  408. * @brief specifies whether new documents should use CR/LF instead of LF.
  409. * 指定新文档是否应使用`CR/LF`而不是`LF`
  410. */
  411. bool mHasCR;
  412. /**
  413. * @brief specifies whether to allow line breaks in quoted text.
  414. * 指定是否允许带引号的文本中包含换行符
  415. */
  416. bool mQuotedLinebreaks;
  417. /**
  418. * @brief specifies whether to automatically dequote cell data.
  419. * 指定是否自动去除单元格数据的引号
  420. */
  421. bool mAutoQuote;
  422. /**
  423. * @brief specifies the quote character.
  424. * 指定引号字符
  425. */
  426. char mQuoteChar;
  427. };
  428. /**
  429. * @brief Datastructure holding parameters controlling how special line formats should be
  430. * treated.
  431. * 保存控制如何处理特殊行格式的参数的数据结构
  432. */
  433. struct LineReaderParams
  434. {
  435. /**
  436. * @brief Constructor
  437. * @param pSkipCommentLines specifies whether to skip lines prefixed with
  438. * mCommentPrefix. Default: true
  439. * 指定是否跳过以 mCommentPrefix 为前缀的行。默认值:true
  440. * @param pCommentPrefix specifies which prefix character to indicate a comment
  441. * line. Default: #
  442. * 指定用于表示注释行的前缀字符。默认值:#
  443. * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false
  444. * 指定是否跳过空行。默认值:false
  445. */
  446. explicit LineReaderParams(const bool pSkipCommentLines = true,
  447. const char pCommentPrefix = '#',
  448. const bool pSkipEmptyLines = false)
  449. : mSkipCommentLines(pSkipCommentLines)
  450. , mCommentPrefix(pCommentPrefix)
  451. , mSkipEmptyLines(pSkipEmptyLines)
  452. {
  453. }
  454. /**
  455. * @brief specifies whether to skip lines prefixed with mCommentPrefix.
  456. * 指定是否跳过以`mCommentPrefix`为前缀的行
  457. */
  458. bool mSkipCommentLines;
  459. /**
  460. * @brief specifies which prefix character to indicate a comment line.
  461. * 指定用于表示注释行的前缀字符
  462. */
  463. char mCommentPrefix;
  464. /**
  465. * @brief specifies whether to skip empty lines.
  466. * 指定是否跳过空行
  467. */
  468. bool mSkipEmptyLines;
  469. };
  470. /**
  471. * @brief Class representing a CSV document.
  472. * 表示 CSV 文档的类
  473. */
  474. class Document
  475. {
  476. public:
  477. /**
  478. * @brief Constructor
  479. * @param pPath specifies the path of an existing CSV-file to populate the Document
  480. * data with.
  481. * 指定现有 CSV 文件的路径,以填充 Document 的数据
  482. * @param pLabelParams specifies which row and column should be treated as labels.
  483. * 指定应作为标签的行和列
  484. * @param pSeparatorParams specifies which field and row separators should be used.
  485. * 指定应使用的字段和行分隔符
  486. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  487. * handled.
  488. * 指定应如何处理无效的数字(包括空字符串)
  489. * @param pLineReaderParams specifies how special line formats should be treated.
  490. * 指定应如何处理特殊的行格式
  491. */
  492. explicit Document(const std::string& pPath = std::string(),
  493. const LabelParams& pLabelParams = LabelParams(),
  494. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  495. const ConverterParams& pConverterParams = ConverterParams(),
  496. const LineReaderParams& pLineReaderParams = LineReaderParams())
  497. : mPath(pPath)
  498. , mLabelParams(pLabelParams)
  499. , mSeparatorParams(pSeparatorParams)
  500. , mConverterParams(pConverterParams)
  501. , mLineReaderParams(pLineReaderParams)
  502. , mData()
  503. , mColumnNames()
  504. , mRowNames()
  505. {
  506. if (!mPath.empty())
  507. {
  508. ReadCsv();
  509. }
  510. }
  511. /**
  512. * @brief Constructor
  513. * @param pStream specifies a binary input stream to read CSV data from.
  514. * 指定用于读取 CSV 数据的二进制输入流
  515. * @param pLabelParams specifies which row and column should be treated as labels.
  516. * 指定应作为标签的行和列
  517. * @param pSeparatorParams specifies which field and row separators should be used.
  518. * 指定应使用的字段和行分隔符
  519. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  520. * handled.
  521. * 指定应如何处理无效的数字(包括空字符串)
  522. * @param pLineReaderParams specifies how special line formats should be treated.
  523. * 指定应如何处理特殊的行格式
  524. */
  525. explicit Document(std::istream& pStream,
  526. const LabelParams& pLabelParams = LabelParams(),
  527. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  528. const ConverterParams& pConverterParams = ConverterParams(),
  529. const LineReaderParams& pLineReaderParams = LineReaderParams())
  530. : mPath()
  531. , mLabelParams(pLabelParams)
  532. , mSeparatorParams(pSeparatorParams)
  533. , mConverterParams(pConverterParams)
  534. , mLineReaderParams(pLineReaderParams)
  535. , mData()
  536. , mColumnNames()
  537. , mRowNames()
  538. {
  539. ReadCsv(pStream);
  540. }
  541. /**
  542. * @brief Read Document data from file.
  543. * 从文件中读取`Document`数据
  544. * @param pPath specifies the path of an existing CSV-file to populate the Document
  545. * data with.
  546. * 指定现有 CSV 文件的路径,以填充 Document 的数据
  547. * @param pLabelParams specifies which row and column should be treated as labels.
  548. * 指定应作为标签的行和列
  549. * @param pSeparatorParams specifies which field and row separators should be used.
  550. * 指定应使用的字段和行分隔符
  551. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  552. * handled.
  553. * 指定应如何处理无效的数字(包括空字符串)
  554. * @param pLineReaderParams specifies how special line formats should be treated.
  555. * 指定应如何处理特殊的行格式
  556. */
  557. void Load(const std::string& pPath,
  558. const LabelParams& pLabelParams = LabelParams(),
  559. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  560. const ConverterParams& pConverterParams = ConverterParams(),
  561. const LineReaderParams& pLineReaderParams = LineReaderParams())
  562. {
  563. mPath = pPath;
  564. mLabelParams = pLabelParams;
  565. mSeparatorParams = pSeparatorParams;
  566. mConverterParams = pConverterParams;
  567. mLineReaderParams = pLineReaderParams;
  568. ReadCsv();
  569. }
  570. /**
  571. * @brief Read Document data from stream.
  572. * 从流中读取`Document`数据
  573. * @param pStream specifies a binary input stream to read CSV data from.
  574. * 指定用于读取 CSV 数据的二进制输入流
  575. * @param pLabelParams specifies which row and column should be treated as labels.
  576. * 指定应作为标签的行和列
  577. * @param pSeparatorParams specifies which field and row separators should be used.
  578. * 指定应使用的字段和行分隔符
  579. * @param pConverterParams specifies how invalid numbers (including empty strings) should be
  580. * handled.
  581. * 指定应如何处理无效的数字(包括空字符串)
  582. * @param pLineReaderParams specifies how special line formats should be treated.
  583. * 指定应如何处理特殊的行格式
  584. */
  585. void Load(std::istream& pStream,
  586. const LabelParams& pLabelParams = LabelParams(),
  587. const SeparatorParams& pSeparatorParams = SeparatorParams(),
  588. const ConverterParams& pConverterParams = ConverterParams(),
  589. const LineReaderParams& pLineReaderParams = LineReaderParams())
  590. {
  591. mPath = "";
  592. mLabelParams = pLabelParams;
  593. mSeparatorParams = pSeparatorParams;
  594. mConverterParams = pConverterParams;
  595. mLineReaderParams = pLineReaderParams;
  596. ReadCsv(pStream);
  597. }
  598. /**
  599. * @brief Write Document data to file.
  600. * 将`Document`数据写入文件
  601. * @param pPath optionally specifies the path where the CSV-file will be created
  602. * (if not specified, the original path provided when creating or
  603. * loading the Document data will be used).
  604. * 可选参数,指定 CSV 文件将创建的路径
  605. * (如果未指定,则使用创建或加载 Document 数据时提供的原始路径)
  606. */
  607. void Save(const std::string& pPath = std::string())
  608. {
  609. if (!pPath.empty())
  610. {
  611. mPath = pPath;
  612. }
  613. WriteCsv();
  614. }
  615. /**
  616. * @brief Write Document data to stream.
  617. * 将`Document`数据写入流
  618. * @param pStream specifies a binary output stream to write the data to.
  619. * 指定用于写入数据的二进制输出流
  620. */
  621. void Save(std::ostream& pStream) const
  622. {
  623. WriteCsv(pStream);
  624. }
  625. /**
  626. * @brief Clears loaded Document data.
  627. * 清除加载的`Document`数据
  628. *
  629. */
  630. void Clear()
  631. {
  632. mData.clear();
  633. mColumnNames.clear();
  634. mRowNames.clear();
  635. #ifdef HAS_CODECVT
  636. mIsUtf16 = false;
  637. mIsLE = false;
  638. #endif
  639. mHasUtf8BOM = false;
  640. }
  641. /**
  642. * @brief Get column index by name.
  643. * 获取列索引通过列名称
  644. * @param pColumnName column label name.
  645. * 列标签名称
  646. * @returns zero-based column index.
  647. * 返回零基列索引
  648. */
  649. int GetColumnIdx(const std::string& pColumnName) const
  650. {
  651. if (mLabelParams.mColumnNameIdx >= 0)
  652. {
  653. if (mColumnNames.find(pColumnName) != mColumnNames.end())
  654. {
  655. return static_cast<int>(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1);
  656. }
  657. }
  658. return -1;
  659. }
  660. /**
  661. * @brief Get column by index.
  662. * 通过索引获取列数据
  663. * @param pColumnIdx zero-based column index.
  664. * 零基列索引
  665. * @returns vector of column data.
  666. * 返回列数据的向量
  667. */
  668. template<typename T>
  669. std::vector<T> GetColumn(const size_t pColumnIdx) const
  670. {
  671. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  672. std::vector<T> column;
  673. Converter<T> converter(mConverterParams);
  674. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  675. {
  676. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  677. {
  678. if (dataColumnIdx < itRow->size())
  679. {
  680. T val;
  681. converter.ToVal(itRow->at(dataColumnIdx), val);
  682. column.push_back(val);
  683. }
  684. else
  685. {
  686. const std::string errStr = "requested column index " +
  687. std::to_string(pColumnIdx) + " >= " +
  688. std::to_string(itRow->size() - GetDataColumnIndex(0)) +
  689. " (number of columns on row index " +
  690. std::to_string(std::distance(mData.begin(), itRow) -
  691. (mLabelParams.mColumnNameIdx + 1)) + ")";
  692. throw std::out_of_range(errStr);
  693. }
  694. }
  695. }
  696. return column;
  697. }
  698. /**
  699. * @brief Get column by index.
  700. * 通过索引获取列数据
  701. * @param pColumnIdx zero-based column index.
  702. * 零基列索引
  703. * @param pToVal conversion function.
  704. * 转换函数
  705. * @returns vector of column data.
  706. * 返回列数据的向量
  707. */
  708. template<typename T>
  709. std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const
  710. {
  711. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  712. std::vector<T> column;
  713. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  714. {
  715. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  716. {
  717. T val;
  718. pToVal(itRow->at(dataColumnIdx), val);
  719. column.push_back(val);
  720. }
  721. }
  722. return column;
  723. }
  724. /**
  725. * @brief Get column by name.
  726. * 通过名称获取列数据
  727. * @param pColumnName column label name.
  728. * 列标签名称
  729. * @returns vector of column data.
  730. * 返回列数据的向量
  731. */
  732. template<typename T>
  733. std::vector<T> GetColumn(const std::string& pColumnName) const
  734. {
  735. const int columnIdx = GetColumnIdx(pColumnName);
  736. if (columnIdx < 0)
  737. {
  738. throw std::out_of_range("column not found: " + pColumnName);
  739. }
  740. return GetColumn<T>(static_cast<size_t>(columnIdx));
  741. }
  742. /**
  743. * @brief Get column by name.
  744. * 通过名称获取列数据
  745. * @param pColumnName column label name.
  746. * 列标签名称
  747. * @param pToVal conversion function.
  748. * 转换函数
  749. * @returns vector of column data.
  750. * 返回列数据的向量
  751. */
  752. template<typename T>
  753. std::vector<T> GetColumn(const std::string& pColumnName, ConvFunc<T> pToVal) const
  754. {
  755. const int columnIdx = GetColumnIdx(pColumnName);
  756. if (columnIdx < 0)
  757. {
  758. throw std::out_of_range("column not found: " + pColumnName);
  759. }
  760. return GetColumn<T>(static_cast<size_t>(columnIdx), pToVal);
  761. }
  762. /**
  763. * @brief Set column by index.
  764. * 设置列数据通过索引
  765. * @param pColumnIdx zero-based column index.
  766. * 零基列索引
  767. * @param pColumn vector of column data.
  768. * 列数据向量
  769. */
  770. template<typename T>
  771. void SetColumn(const size_t pColumnIdx, const std::vector<T>& pColumn)
  772. {
  773. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  774. while (GetDataRowIndex(pColumn.size()) > GetDataRowCount())
  775. {
  776. std::vector<std::string> row;
  777. row.resize(GetDataColumnCount());
  778. mData.push_back(row);
  779. }
  780. if ((dataColumnIdx + 1) > GetDataColumnCount())
  781. {
  782. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  783. {
  784. if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
  785. {
  786. itRow->resize(GetDataColumnIndex(dataColumnIdx + 1));
  787. }
  788. }
  789. }
  790. Converter<T> converter(mConverterParams);
  791. for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
  792. {
  793. std::string str;
  794. converter.ToStr(*itRow, str);
  795. mData.at(static_cast<size_t>(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at(
  796. dataColumnIdx) = str;
  797. }
  798. }
  799. /**
  800. * @brief Set column by name.
  801. * 通过名称设置列数据
  802. * @param pColumnName column label name.
  803. * 列标签名称
  804. * @param pColumn vector of column data.
  805. * 列数据向量
  806. */
  807. template<typename T>
  808. void SetColumn(const std::string& pColumnName, const std::vector<T>& pColumn)
  809. {
  810. const int columnIdx = GetColumnIdx(pColumnName);
  811. if (columnIdx < 0)
  812. {
  813. throw std::out_of_range("column not found: " + pColumnName);
  814. }
  815. SetColumn<T>(static_cast<size_t>(columnIdx), pColumn);
  816. }
  817. /**
  818. * @brief Remove column by index.
  819. * 通过索引移除列
  820. * @param pColumnIdx zero-based column index.
  821. * 零基列索引
  822. */
  823. void RemoveColumn(const size_t pColumnIdx)
  824. {
  825. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  826. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  827. {
  828. if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
  829. {
  830. itRow->erase(itRow->begin() + static_cast<int>(dataColumnIdx));
  831. }
  832. }
  833. UpdateColumnNames();
  834. }
  835. /**
  836. * @brief Remove column by name.
  837. * 通过名称移除列
  838. * @param pColumnName column label name.
  839. * 列标签名称
  840. */
  841. void RemoveColumn(const std::string& pColumnName)
  842. {
  843. int columnIdx = GetColumnIdx(pColumnName);
  844. if (columnIdx < 0)
  845. {
  846. throw std::out_of_range("column not found: " + pColumnName);
  847. }
  848. RemoveColumn(static_cast<size_t>(columnIdx));
  849. }
  850. /**
  851. * @brief Insert column at specified index.
  852. * 在指定索引处插入列
  853. * @param pColumnIdx zero-based column index.
  854. * 零基列索引
  855. * @param pColumn vector of column data (optional argument).
  856. * 列数据向量(可选参数)
  857. * @param pColumnName column label name (optional argument).
  858. * 列标签名称(可选参数)
  859. */
  860. template<typename T>
  861. void InsertColumn(const size_t pColumnIdx, const std::vector<T>& pColumn = std::vector<T>(),
  862. const std::string& pColumnName = std::string())
  863. {
  864. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  865. std::vector<std::string> column;
  866. if (pColumn.empty())
  867. {
  868. column.resize(GetDataRowCount());
  869. }
  870. else
  871. {
  872. column.resize(GetDataRowIndex(pColumn.size()));
  873. Converter<T> converter(mConverterParams);
  874. for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow)
  875. {
  876. std::string str;
  877. converter.ToStr(*itRow, str);
  878. const size_t rowIdx =
  879. static_cast<size_t>(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1));
  880. column.at(rowIdx) = str;
  881. }
  882. }
  883. while (column.size() > GetDataRowCount())
  884. {
  885. std::vector<std::string> row;
  886. const size_t columnCount = std::max<size_t>(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1),
  887. GetDataColumnCount());
  888. row.resize(columnCount);
  889. mData.push_back(row);
  890. }
  891. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  892. {
  893. if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
  894. {
  895. const size_t rowIdx = static_cast<size_t>(std::distance(mData.begin(), itRow));
  896. itRow->insert(itRow->begin() + static_cast<int>(dataColumnIdx), column.at(rowIdx));
  897. }
  898. }
  899. if (!pColumnName.empty())
  900. {
  901. SetColumnName(pColumnIdx, pColumnName);
  902. }
  903. UpdateColumnNames();
  904. }
  905. /**
  906. * @brief Get number of data columns (excluding label columns).
  907. * 获取数据列的数量(不包括标签列)
  908. * @returns column count.
  909. * 返回列数
  910. */
  911. size_t GetColumnCount() const
  912. {
  913. const int count = static_cast<int>((mData.size() > 0) ? mData.at(0).size() : 0) -
  914. (mLabelParams.mRowNameIdx + 1);
  915. return (count >= 0) ? static_cast<size_t>(count) : 0;
  916. }
  917. /**
  918. * @brief Get row index by name.
  919. * 通过名称获取行索引
  920. * @param pRowName row label name.
  921. * 行标签名称
  922. * @returns zero-based row index.
  923. * 返回零基行索引
  924. */
  925. int GetRowIdx(const std::string& pRowName) const
  926. {
  927. if (mLabelParams.mRowNameIdx >= 0)
  928. {
  929. if (mRowNames.find(pRowName) != mRowNames.end())
  930. {
  931. return static_cast<int>(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1);
  932. }
  933. }
  934. return -1;
  935. }
  936. /**
  937. * @brief Get row by index.
  938. * 通过索引获取行数据
  939. * @param pRowIdx zero-based row index.
  940. * 零基行索引
  941. * @returns vector of row data.
  942. * 返回行数据的向量
  943. */
  944. template<typename T>
  945. std::vector<T> GetRow(const size_t pRowIdx) const
  946. {
  947. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  948. std::vector<T> row;
  949. Converter<T> converter(mConverterParams);
  950. for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
  951. {
  952. if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
  953. {
  954. T val;
  955. converter.ToVal(*itCol, val);
  956. row.push_back(val);
  957. }
  958. }
  959. return row;
  960. }
  961. /**
  962. * @brief Get row by index.
  963. * 通过索引获取行数据
  964. * @param pRowIdx zero-based row index.
  965. * 零基行索引
  966. * @param pToVal conversion function.
  967. * 转换函数
  968. * @returns vector of row data.
  969. * 返回行数据的向量
  970. */
  971. template<typename T>
  972. std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const
  973. {
  974. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  975. std::vector<T> row;
  976. Converter<T> converter(mConverterParams);
  977. for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol)
  978. {
  979. if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx)
  980. {
  981. T val;
  982. pToVal(*itCol, val);
  983. row.push_back(val);
  984. }
  985. }
  986. return row;
  987. }
  988. /**
  989. * @brief Get row by name.
  990. * 通过名称获取行数据
  991. * @param pRowName row label name.
  992. * 行标签名称
  993. * @returns vector of row data.
  994. * 返回行数据的向量
  995. */
  996. template<typename T>
  997. std::vector<T> GetRow(const std::string& pRowName) const
  998. {
  999. int rowIdx = GetRowIdx(pRowName);
  1000. if (rowIdx < 0)
  1001. {
  1002. throw std::out_of_range("row not found: " + pRowName);
  1003. }
  1004. return GetRow<T>(static_cast<size_t>(rowIdx));
  1005. }
  1006. /**
  1007. * @brief Get row by name.
  1008. * 通过名称获取行数据
  1009. * @param pRowName row label name.
  1010. * 行标签名称
  1011. * @param pToVal conversion function.
  1012. * 转换函数
  1013. * @returns vector of row data.
  1014. * 返回行数据的向量
  1015. */
  1016. template<typename T>
  1017. std::vector<T> GetRow(const std::string& pRowName, ConvFunc<T> pToVal) const
  1018. {
  1019. int rowIdx = GetRowIdx(pRowName);
  1020. if (rowIdx < 0)
  1021. {
  1022. throw std::out_of_range("row not found: " + pRowName);
  1023. }
  1024. return GetRow<T>(static_cast<size_t>(rowIdx), pToVal);
  1025. }
  1026. /**
  1027. * @brief Set row by index.
  1028. * 通过索引设置行数据
  1029. * @param pRowIdx zero-based row index.
  1030. * 零基行索引
  1031. * @param pRow vector of row data.
  1032. * 行数据的向量
  1033. */
  1034. template<typename T>
  1035. void SetRow(const size_t pRowIdx, const std::vector<T>& pRow)
  1036. {
  1037. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1038. while ((dataRowIdx + 1) > GetDataRowCount())
  1039. {
  1040. std::vector<std::string> row;
  1041. row.resize(GetDataColumnCount());
  1042. mData.push_back(row);
  1043. }
  1044. if (pRow.size() > GetDataColumnCount())
  1045. {
  1046. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  1047. {
  1048. if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
  1049. {
  1050. itRow->resize(GetDataColumnIndex(pRow.size()));
  1051. }
  1052. }
  1053. }
  1054. Converter<T> converter(mConverterParams);
  1055. for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
  1056. {
  1057. std::string str;
  1058. converter.ToStr(*itCol, str);
  1059. mData.at(dataRowIdx).at(static_cast<size_t>(std::distance(pRow.begin(),
  1060. itCol) + mLabelParams.mRowNameIdx + 1)) = str;
  1061. }
  1062. }
  1063. /**
  1064. * @brief Set row by name.
  1065. * 通过名称设置行数据
  1066. * @param pRowName row label name.
  1067. * 行标签名称
  1068. * @param pRow vector of row data.
  1069. * 行数据的向量
  1070. */
  1071. template<typename T>
  1072. void SetRow(const std::string& pRowName, const std::vector<T>& pRow)
  1073. {
  1074. int rowIdx = GetRowIdx(pRowName);
  1075. if (rowIdx < 0)
  1076. {
  1077. throw std::out_of_range("row not found: " + pRowName);
  1078. }
  1079. return SetRow<T>(static_cast<size_t>(rowIdx), pRow);
  1080. }
  1081. /**
  1082. * @brief Remove row by index.
  1083. * 通过索引移除行
  1084. * @param pRowIdx zero-based row index.
  1085. * 零基行索引
  1086. */
  1087. void RemoveRow(const size_t pRowIdx)
  1088. {
  1089. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1090. mData.erase(mData.begin() + static_cast<int>(dataRowIdx));
  1091. UpdateRowNames();
  1092. }
  1093. /**
  1094. * @brief Remove row by name.
  1095. * 通过名称移除行
  1096. * @param pRowName row label name.
  1097. * 行标签名称
  1098. */
  1099. void RemoveRow(const std::string& pRowName)
  1100. {
  1101. int rowIdx = GetRowIdx(pRowName);
  1102. if (rowIdx < 0)
  1103. {
  1104. throw std::out_of_range("row not found: " + pRowName);
  1105. }
  1106. RemoveRow(static_cast<size_t>(rowIdx));
  1107. }
  1108. /**
  1109. * @brief Insert row at specified index.
  1110. * 在指定索引处插入行
  1111. * @param pRowIdx zero-based row index.
  1112. * 零基行索引
  1113. * @param pRow vector of row data (optional argument).
  1114. * 行数据的向量(可选参数)
  1115. * @param pRowName row label name (optional argument).
  1116. * 行标签名称(可选参数)
  1117. */
  1118. template<typename T>
  1119. void InsertRow(const size_t pRowIdx, const std::vector<T>& pRow = std::vector<T>(),
  1120. const std::string& pRowName = std::string())
  1121. {
  1122. const size_t rowIdx = GetDataRowIndex(pRowIdx);
  1123. std::vector<std::string> row;
  1124. if (pRow.empty())
  1125. {
  1126. row.resize(GetDataColumnCount());
  1127. }
  1128. else
  1129. {
  1130. row.resize(GetDataColumnIndex(pRow.size()));
  1131. Converter<T> converter(mConverterParams);
  1132. for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol)
  1133. {
  1134. std::string str;
  1135. converter.ToStr(*itCol, str);
  1136. row.at(static_cast<size_t>(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str;
  1137. }
  1138. }
  1139. while (rowIdx > GetDataRowCount())
  1140. {
  1141. std::vector<std::string> tempRow;
  1142. tempRow.resize(GetDataColumnCount());
  1143. mData.push_back(tempRow);
  1144. }
  1145. mData.insert(mData.begin() + static_cast<int>(rowIdx), row);
  1146. if (!pRowName.empty())
  1147. {
  1148. SetRowName(pRowIdx, pRowName);
  1149. }
  1150. UpdateRowNames();
  1151. }
  1152. /**
  1153. * @brief Get number of data rows (excluding label rows).
  1154. * 获取数据行的数量(不包括标签行)
  1155. * @returns row count.
  1156. * 返回行数
  1157. */
  1158. size_t GetRowCount() const
  1159. {
  1160. const int count = static_cast<int>(mData.size()) - (mLabelParams.mColumnNameIdx + 1);
  1161. return (count >= 0) ? static_cast<size_t>(count) : 0;
  1162. }
  1163. /**
  1164. * @brief Get cell by index.
  1165. * 通过索引获取单元格数据
  1166. * @param pColumnIdx zero-based column index.
  1167. * 零基列索引
  1168. * @param pRowIdx zero-based row index.
  1169. * 零基行索引
  1170. * @returns cell data.
  1171. * 单元格数据
  1172. */
  1173. template<typename T>
  1174. T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const
  1175. {
  1176. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1177. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1178. T val;
  1179. Converter<T> converter(mConverterParams);
  1180. converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
  1181. return val;
  1182. }
  1183. /**
  1184. * @brief Get cell by index.
  1185. * 通过索引获取单元格数据
  1186. * @param pColumnIdx zero-based column index.
  1187. * 零基列索引
  1188. * @param pRowIdx zero-based row index.
  1189. * 零基行索引
  1190. * @param pToVal conversion function.
  1191. * 转换函数
  1192. * @returns cell data.
  1193. * 单元格数据
  1194. */
  1195. template<typename T>
  1196. T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const
  1197. {
  1198. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1199. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1200. T val;
  1201. pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val);
  1202. return val;
  1203. }
  1204. /**
  1205. * @brief Get cell by name.
  1206. * 通过名称获取单元格数据
  1207. * @param pColumnName column label name.
  1208. * 列标签名称
  1209. * @param pRowName row label name.
  1210. * 行标签名称
  1211. * @returns cell data.
  1212. * 单元格数据
  1213. */
  1214. template<typename T>
  1215. T GetCell(const std::string& pColumnName, const std::string& pRowName) const
  1216. {
  1217. const int columnIdx = GetColumnIdx(pColumnName);
  1218. if (columnIdx < 0)
  1219. {
  1220. throw std::out_of_range("column not found: " + pColumnName);
  1221. }
  1222. const int rowIdx = GetRowIdx(pRowName);
  1223. if (rowIdx < 0)
  1224. {
  1225. throw std::out_of_range("row not found: " + pRowName);
  1226. }
  1227. return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx));
  1228. }
  1229. /**
  1230. * @brief Get cell by name.
  1231. * 通过名称获取单元格数据
  1232. * @param pColumnName column label name.
  1233. * 列标签名称
  1234. * @param pRowName row label name.
  1235. * 行标签名称
  1236. * @param pToVal conversion function.
  1237. * 转换函数
  1238. * @returns cell data.
  1239. * 单元格数据
  1240. */
  1241. template<typename T>
  1242. T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc<T> pToVal) const
  1243. {
  1244. const int columnIdx = GetColumnIdx(pColumnName);
  1245. if (columnIdx < 0)
  1246. {
  1247. throw std::out_of_range("column not found: " + pColumnName);
  1248. }
  1249. const int rowIdx = GetRowIdx(pRowName);
  1250. if (rowIdx < 0)
  1251. {
  1252. throw std::out_of_range("row not found: " + pRowName);
  1253. }
  1254. return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pToVal);
  1255. }
  1256. /**
  1257. * @brief Get cell by column name and row index.
  1258. * 通过列名称和行索引获取单元格数据
  1259. * @param pColumnName column label name.
  1260. * 列标签名称
  1261. * @param pRowIdx zero-based row index.
  1262. * 零基行索引
  1263. * @returns cell data.
  1264. * 单元格数据
  1265. */
  1266. template<typename T>
  1267. T GetCell(const std::string& pColumnName, const size_t pRowIdx) const
  1268. {
  1269. const int columnIdx = GetColumnIdx(pColumnName);
  1270. if (columnIdx < 0)
  1271. {
  1272. throw std::out_of_range("column not found: " + pColumnName);
  1273. }
  1274. return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx);
  1275. }
  1276. /**
  1277. * @brief Get cell by column name and row index.
  1278. * 通过列名称和行索引获取单元格数据
  1279. * @param pColumnName column label name.
  1280. * 列标签名称
  1281. * @param pRowIdx zero-based row index.
  1282. * 零基行索引
  1283. * @param pToVal conversion function.
  1284. * 转换函数
  1285. * @returns cell data.
  1286. * 单元格数据
  1287. */
  1288. template<typename T>
  1289. T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc<T> pToVal) const
  1290. {
  1291. const int columnIdx = GetColumnIdx(pColumnName);
  1292. if (columnIdx < 0)
  1293. {
  1294. throw std::out_of_range("column not found: " + pColumnName);
  1295. }
  1296. return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pToVal);
  1297. }
  1298. /**
  1299. * @brief Get cell by column index and row name.
  1300. * 通过列索引和行名称获取单元格数据
  1301. * @param pColumnIdx zero-based column index.
  1302. * 零基列索引
  1303. * @param pRowName row label name.
  1304. * 行标签名称
  1305. * @returns cell data.
  1306. * 单元格数据
  1307. */
  1308. template<typename T>
  1309. T GetCell(const size_t pColumnIdx, const std::string& pRowName) const
  1310. {
  1311. const int rowIdx = GetRowIdx(pRowName);
  1312. if (rowIdx < 0)
  1313. {
  1314. throw std::out_of_range("row not found: " + pRowName);
  1315. }
  1316. return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx));
  1317. }
  1318. /**
  1319. * @brief Get cell by column index and row name.
  1320. * 通过列索引和行名称获取单元格数据
  1321. * @param pColumnIdx zero-based column index.
  1322. * 零基列索引
  1323. * @param pRowName row label name.
  1324. * 行标签名称
  1325. * @param pToVal conversion function.
  1326. * 转换函数
  1327. * @returns cell data.
  1328. * 单元格数据
  1329. */
  1330. template<typename T>
  1331. T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc<T> pToVal) const
  1332. {
  1333. const int rowIdx = GetRowIdx(pRowName);
  1334. if (rowIdx < 0)
  1335. {
  1336. throw std::out_of_range("row not found: " + pRowName);
  1337. }
  1338. return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pToVal);
  1339. }
  1340. /**
  1341. * @brief Set cell by index.
  1342. * 通过索引设置单元格数据
  1343. * @param pRowIdx zero-based row index.
  1344. * 零基行索引
  1345. * @param pColumnIdx zero-based column index.
  1346. * 零基列索引
  1347. * @param pCell cell data.
  1348. * 单元格数据
  1349. */
  1350. template<typename T>
  1351. void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell)
  1352. {
  1353. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1354. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1355. while ((dataRowIdx + 1) > GetDataRowCount())
  1356. {
  1357. std::vector<std::string> row;
  1358. row.resize(GetDataColumnCount());
  1359. mData.push_back(row);
  1360. }
  1361. if ((dataColumnIdx + 1) > GetDataColumnCount())
  1362. {
  1363. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  1364. {
  1365. if (std::distance(mData.begin(), itRow) >= mLabelParams.mColumnNameIdx)
  1366. {
  1367. itRow->resize(dataColumnIdx + 1);
  1368. }
  1369. }
  1370. }
  1371. std::string str;
  1372. Converter<T> converter(mConverterParams);
  1373. converter.ToStr(pCell, str);
  1374. mData.at(dataRowIdx).at(dataColumnIdx) = str;
  1375. }
  1376. /**
  1377. * @brief Set cell by name.
  1378. * 通过列名称和行名称设置单元格数据
  1379. * @param pColumnName column label name.
  1380. * 列标签名称
  1381. * @param pRowName row label name.
  1382. * 行标签名称
  1383. * @param pCell cell data.
  1384. * 单元格数据
  1385. */
  1386. template<typename T>
  1387. void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell)
  1388. {
  1389. const int columnIdx = GetColumnIdx(pColumnName);
  1390. if (columnIdx < 0)
  1391. {
  1392. throw std::out_of_range("column not found: " + pColumnName);
  1393. }
  1394. const int rowIdx = GetRowIdx(pRowName);
  1395. if (rowIdx < 0)
  1396. {
  1397. throw std::out_of_range("row not found: " + pRowName);
  1398. }
  1399. SetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pCell);
  1400. }
  1401. /**
  1402. * @brief Set cell by column index and row name.
  1403. * 通过列索引和行名称设置单元格数据
  1404. * @param pColumnIdx zero-based column index.
  1405. * 零基列索引
  1406. * @param pRowName row label name.
  1407. * 行标签名称
  1408. * @param pCell cell data.
  1409. * 单元格数据
  1410. */
  1411. template<typename T>
  1412. void SetCell(const size_t pColumnIdx, const std::string& pRowName, const T& pCell)
  1413. {
  1414. const int rowIdx = GetRowIdx(pRowName);
  1415. if (rowIdx < 0)
  1416. {
  1417. throw std::out_of_range("row not found: " + pRowName);
  1418. }
  1419. SetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pCell);
  1420. }
  1421. /**
  1422. * @brief Set cell by column name and row index.
  1423. * 通过列名称和行索引设置单元格数据
  1424. * @param pColumnName column label name.
  1425. * 列标签名称
  1426. * @param pRowIdx zero-based row index.
  1427. * 零基行索引
  1428. * @param pCell cell data.
  1429. * 单元格数据
  1430. */
  1431. template<typename T>
  1432. void SetCell(const std::string& pColumnName, const size_t pRowIdx, const T& pCell)
  1433. {
  1434. const int columnIdx = GetColumnIdx(pColumnName);
  1435. if (columnIdx < 0)
  1436. {
  1437. throw std::out_of_range("column not found: " + pColumnName);
  1438. }
  1439. SetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pCell);
  1440. }
  1441. /**
  1442. * @brief Get column name
  1443. * 获取列名称
  1444. * @param pColumnIdx zero-based column index.
  1445. * 零基列索引
  1446. * @returns column name.
  1447. * 返回列名称
  1448. */
  1449. std::string GetColumnName(const size_t pColumnIdx) const
  1450. {
  1451. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1452. if (mLabelParams.mColumnNameIdx < 0)
  1453. {
  1454. throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
  1455. }
  1456. return mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx);
  1457. }
  1458. /**
  1459. * @brief Set column name
  1460. * 设置列名称
  1461. * @param pColumnIdx zero-based column index.
  1462. * 零基列索引
  1463. * @param pColumnName column name.
  1464. * 列名称
  1465. */
  1466. void SetColumnName(size_t pColumnIdx, const std::string& pColumnName)
  1467. {
  1468. if (mLabelParams.mColumnNameIdx < 0)
  1469. {
  1470. throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx));
  1471. }
  1472. const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx);
  1473. mColumnNames[pColumnName] = dataColumnIdx;
  1474. // increase table size if necessary:
  1475. const size_t rowIdx = static_cast<size_t>(mLabelParams.mColumnNameIdx);
  1476. if (rowIdx >= mData.size())
  1477. {
  1478. mData.resize(rowIdx + 1);
  1479. }
  1480. auto& row = mData[rowIdx];
  1481. if (dataColumnIdx >= row.size())
  1482. {
  1483. row.resize(dataColumnIdx + 1);
  1484. }
  1485. mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName;
  1486. }
  1487. /**
  1488. * @brief Get column names
  1489. * 获取列名称
  1490. * @returns vector of column names.
  1491. * 返回列名称的向量
  1492. */
  1493. std::vector<std::string> GetColumnNames() const
  1494. {
  1495. if (mLabelParams.mColumnNameIdx >= 0)
  1496. {
  1497. return std::vector<std::string>(mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).begin() +
  1498. (mLabelParams.mRowNameIdx + 1),
  1499. mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).end());
  1500. }
  1501. return std::vector<std::string>();
  1502. }
  1503. /**
  1504. * @brief Get row name
  1505. * 获取行名称
  1506. * @param pRowIdx zero-based column index.
  1507. * 零基列索引
  1508. * @returns row name.
  1509. * 返回行名称
  1510. */
  1511. std::string GetRowName(const size_t pRowIdx) const
  1512. {
  1513. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1514. if (mLabelParams.mRowNameIdx < 0)
  1515. {
  1516. throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
  1517. }
  1518. return mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx));
  1519. }
  1520. /**
  1521. * @brief Set row name
  1522. * 设置行名称
  1523. * @param pRowIdx zero-based row index.
  1524. * 零基行索引
  1525. * @param pRowName row name.
  1526. * 行名称
  1527. */
  1528. void SetRowName(size_t pRowIdx, const std::string& pRowName)
  1529. {
  1530. const size_t dataRowIdx = GetDataRowIndex(pRowIdx);
  1531. mRowNames[pRowName] = dataRowIdx;
  1532. if (mLabelParams.mRowNameIdx < 0)
  1533. {
  1534. throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx));
  1535. }
  1536. // increase table size if necessary:
  1537. if (dataRowIdx >= mData.size())
  1538. {
  1539. mData.resize(dataRowIdx + 1);
  1540. }
  1541. auto& row = mData[dataRowIdx];
  1542. if (mLabelParams.mRowNameIdx >= static_cast<int>(row.size()))
  1543. {
  1544. row.resize(static_cast<size_t>(mLabelParams.mRowNameIdx) + 1);
  1545. }
  1546. mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)) = pRowName;
  1547. }
  1548. /**
  1549. * @brief Get row names
  1550. * 获取行名称
  1551. * @returns vector of row names.
  1552. * 返回行名称的向量
  1553. */
  1554. std::vector<std::string> GetRowNames() const
  1555. {
  1556. std::vector<std::string> rownames;
  1557. if (mLabelParams.mRowNameIdx >= 0)
  1558. {
  1559. for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow)
  1560. {
  1561. if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx)
  1562. {
  1563. rownames.push_back(itRow->at(static_cast<size_t>(mLabelParams.mRowNameIdx)));
  1564. }
  1565. }
  1566. }
  1567. return rownames;
  1568. }
  1569. private:
  1570. void ReadCsv()
  1571. {
  1572. std::ifstream stream;
  1573. stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
  1574. stream.open(mPath, std::ios::binary);
  1575. ReadCsv(stream);
  1576. }
  1577. void ReadCsv(std::istream& pStream)
  1578. {
  1579. Clear();
  1580. pStream.seekg(0, std::ios::end);
  1581. std::streamsize length = pStream.tellg();
  1582. pStream.seekg(0, std::ios::beg);
  1583. #ifdef HAS_CODECVT
  1584. std::vector<char> bom2b(2, '\0');
  1585. if (length >= 2)
  1586. {
  1587. pStream.read(bom2b.data(), 2);
  1588. pStream.seekg(0, std::ios::beg);
  1589. }
  1590. static const std::vector<char> bomU16le = { '\xff', '\xfe' };
  1591. static const std::vector<char> bomU16be = { '\xfe', '\xff' };
  1592. if ((bom2b == bomU16le) || (bom2b == bomU16be))
  1593. {
  1594. mIsUtf16 = true;
  1595. mIsLE = (bom2b == bomU16le);
  1596. std::wifstream wstream;
  1597. wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
  1598. wstream.open(mPath, std::ios::binary);
  1599. if (mIsLE)
  1600. {
  1601. wstream.imbue(std::locale(wstream.getloc(),
  1602. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1603. static_cast<std::codecvt_mode>(std::consume_header |
  1604. std::little_endian)>));
  1605. }
  1606. else
  1607. {
  1608. wstream.imbue(std::locale(wstream.getloc(),
  1609. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1610. std::consume_header>));
  1611. }
  1612. std::wstringstream wss;
  1613. wss << wstream.rdbuf();
  1614. std::string utf8 = ToString(wss.str());
  1615. std::stringstream ss(utf8);
  1616. ParseCsv(ss, static_cast<std::streamsize>(utf8.size()));
  1617. }
  1618. else
  1619. #endif
  1620. {
  1621. // check for UTF-8 Byte order mark and skip it when found
  1622. if (length >= 3)
  1623. {
  1624. std::vector<char> bom3b(3, '\0');
  1625. pStream.read(bom3b.data(), 3);
  1626. if (bom3b != s_Utf8BOM)
  1627. {
  1628. // file does not start with a UTF-8 Byte order mark
  1629. pStream.seekg(0, std::ios::beg);
  1630. }
  1631. else
  1632. {
  1633. // file did start with a UTF-8 Byte order mark, simply skip it
  1634. length -= 3;
  1635. mHasUtf8BOM = true;
  1636. }
  1637. }
  1638. ParseCsv(pStream, length);
  1639. }
  1640. }
  1641. void ParseCsv(std::istream& pStream, std::streamsize p_FileLength)
  1642. {
  1643. const std::streamsize bufLength = 64 * 1024;
  1644. char* buffer = new char[bufLength];
  1645. std::vector<std::string> row;
  1646. std::string cell;
  1647. bool quoted = false;
  1648. int cr = 0;
  1649. int lf = 0;
  1650. while (p_FileLength > 0)
  1651. {
  1652. const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength);
  1653. pStream.read(buffer, toReadLength);
  1654. // With user-specified istream opened in non-binary mode on windows, we may have a
  1655. // data length mismatch, so ensure we don't parse outside actual data length read.
  1656. const std::streamsize readLength = pStream.gcount();
  1657. if (readLength <= 0)
  1658. {
  1659. break;
  1660. }
  1661. for (size_t i = 0; i < static_cast<size_t>(readLength); ++i)
  1662. {
  1663. if (buffer[i] == mSeparatorParams.mQuoteChar)
  1664. {
  1665. if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar))
  1666. {
  1667. quoted = !quoted;
  1668. }
  1669. else if (mSeparatorParams.mTrim)
  1670. {
  1671. // allow whitespace before first mQuoteChar
  1672. const auto firstQuote = std::find(cell.begin(), cell.end(), mSeparatorParams.mQuoteChar);
  1673. if (std::all_of(cell.begin(), firstQuote, [](int ch) { return isspace(ch); }))
  1674. {
  1675. quoted = !quoted;
  1676. }
  1677. }
  1678. cell += buffer[i];
  1679. }
  1680. else if (buffer[i] == mSeparatorParams.mSeparator)
  1681. {
  1682. if (!quoted)
  1683. {
  1684. row.push_back(Unquote(Trim(cell)));
  1685. cell.clear();
  1686. }
  1687. else
  1688. {
  1689. cell += buffer[i];
  1690. }
  1691. }
  1692. else if (buffer[i] == '\r')
  1693. {
  1694. if (mSeparatorParams.mQuotedLinebreaks && quoted)
  1695. {
  1696. cell += buffer[i];
  1697. }
  1698. else
  1699. {
  1700. ++cr;
  1701. }
  1702. }
  1703. else if (buffer[i] == '\n')
  1704. {
  1705. if (mSeparatorParams.mQuotedLinebreaks && quoted)
  1706. {
  1707. cell += buffer[i];
  1708. }
  1709. else
  1710. {
  1711. ++lf;
  1712. if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty())
  1713. {
  1714. // skip empty line
  1715. }
  1716. else
  1717. {
  1718. row.push_back(Unquote(Trim(cell)));
  1719. if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
  1720. (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
  1721. {
  1722. // skip comment line
  1723. }
  1724. else
  1725. {
  1726. mData.push_back(row);
  1727. }
  1728. cell.clear();
  1729. row.clear();
  1730. quoted = false;
  1731. }
  1732. }
  1733. }
  1734. else
  1735. {
  1736. cell += buffer[i];
  1737. }
  1738. }
  1739. p_FileLength -= readLength;
  1740. }
  1741. // Handle last row / cell without linebreak
  1742. if (row.empty() && cell.empty())
  1743. {
  1744. // skip empty trailing line
  1745. }
  1746. else
  1747. {
  1748. row.push_back(Unquote(Trim(cell)));
  1749. if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() &&
  1750. (row.at(0)[0] == mLineReaderParams.mCommentPrefix))
  1751. {
  1752. // skip comment line
  1753. }
  1754. else
  1755. {
  1756. mData.push_back(row);
  1757. }
  1758. cell.clear();
  1759. row.clear();
  1760. quoted = false;
  1761. }
  1762. delete [] buffer;
  1763. // Assume CR/LF if at least half the linebreaks have CR
  1764. mSeparatorParams.mHasCR = (cr > (lf / 2));
  1765. // Set up column labels
  1766. UpdateColumnNames();
  1767. // Set up row labels
  1768. UpdateRowNames();
  1769. }
  1770. void WriteCsv() const
  1771. {
  1772. #ifdef HAS_CODECVT
  1773. if (mIsUtf16)
  1774. {
  1775. std::stringstream ss;
  1776. WriteCsv(ss);
  1777. std::string utf8 = ss.str();
  1778. std::wstring wstr = ToWString(utf8);
  1779. std::wofstream wstream;
  1780. wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit);
  1781. wstream.open(mPath, std::ios::binary | std::ios::trunc);
  1782. if (mIsLE)
  1783. {
  1784. wstream.imbue(std::locale(wstream.getloc(),
  1785. new std::codecvt_utf16<wchar_t, 0x10ffff,
  1786. static_cast<std::codecvt_mode>(std::little_endian)>));
  1787. }
  1788. else
  1789. {
  1790. wstream.imbue(std::locale(wstream.getloc(),
  1791. new std::codecvt_utf16<wchar_t, 0x10ffff>));
  1792. }
  1793. wstream << static_cast<wchar_t>(0xfeff);
  1794. wstream << wstr;
  1795. }
  1796. else
  1797. #endif
  1798. {
  1799. std::ofstream stream;
  1800. stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
  1801. stream.open(mPath, std::ios::binary | std::ios::trunc);
  1802. if (mHasUtf8BOM)
  1803. {
  1804. stream.write(s_Utf8BOM.data(), 3);
  1805. }
  1806. WriteCsv(stream);
  1807. }
  1808. }
  1809. void WriteCsv(std::ostream& pStream) const
  1810. {
  1811. for (auto itr = mData.begin(); itr != mData.end(); ++itr)
  1812. {
  1813. for (auto itc = itr->begin(); itc != itr->end(); ++itc)
  1814. {
  1815. if (mSeparatorParams.mAutoQuote &&
  1816. ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) ||
  1817. (itc->find(' ') != std::string::npos) ||
  1818. (itc->find('\n') != std::string::npos)))
  1819. {
  1820. // escape quotes in string
  1821. std::string str = *itc;
  1822. const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
  1823. ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr);
  1824. pStream << quoteCharStr << str << quoteCharStr;
  1825. }
  1826. else
  1827. {
  1828. pStream << *itc;
  1829. }
  1830. if (std::distance(itc, itr->end()) > 1)
  1831. {
  1832. pStream << mSeparatorParams.mSeparator;
  1833. }
  1834. }
  1835. pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n");
  1836. }
  1837. }
  1838. size_t GetDataRowCount() const
  1839. {
  1840. return mData.size();
  1841. }
  1842. size_t GetDataColumnCount() const
  1843. {
  1844. const size_t firstDataRow = static_cast<size_t>((mLabelParams.mColumnNameIdx >= 0) ? mLabelParams.mColumnNameIdx : 0);
  1845. return (mData.size() > firstDataRow) ? mData.at(firstDataRow).size() : 0;
  1846. }
  1847. inline size_t GetDataRowIndex(const size_t pRowIdx) const
  1848. {
  1849. const size_t firstDataRow = static_cast<size_t>((mLabelParams.mColumnNameIdx + 1 >= 0) ? mLabelParams.mColumnNameIdx + 1 : 0);
  1850. return pRowIdx + firstDataRow;
  1851. }
  1852. inline size_t GetDataColumnIndex(const size_t pColumnIdx) const
  1853. {
  1854. const size_t firstDataColumn = static_cast<size_t>((mLabelParams.mRowNameIdx + 1 >= 0) ? mLabelParams.mRowNameIdx + 1 : 0);
  1855. return pColumnIdx + firstDataColumn;
  1856. }
  1857. std::string Trim(const std::string& pStr) const
  1858. {
  1859. if (mSeparatorParams.mTrim)
  1860. {
  1861. std::string str = pStr;
  1862. // ltrim
  1863. str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); }));
  1864. // rtrim
  1865. str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end());
  1866. return str;
  1867. }
  1868. else
  1869. {
  1870. return pStr;
  1871. }
  1872. }
  1873. std::string Unquote(const std::string& pStr) const
  1874. {
  1875. if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) &&
  1876. (pStr.front() == mSeparatorParams.mQuoteChar) &&
  1877. (pStr.back() == mSeparatorParams.mQuoteChar))
  1878. {
  1879. // remove start/end quotes
  1880. std::string str = pStr.substr(1, pStr.size() - 2);
  1881. // unescape quotes in string
  1882. const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar);
  1883. ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr);
  1884. return str;
  1885. }
  1886. else
  1887. {
  1888. return pStr;
  1889. }
  1890. }
  1891. void UpdateColumnNames()
  1892. {
  1893. mColumnNames.clear();
  1894. if ((mLabelParams.mColumnNameIdx >= 0) &&
  1895. (static_cast<int>(mData.size()) > mLabelParams.mColumnNameIdx))
  1896. {
  1897. size_t i = 0;
  1898. for (auto& columnName : mData[static_cast<size_t>(mLabelParams.mColumnNameIdx)])
  1899. {
  1900. mColumnNames[columnName] = i++;
  1901. }
  1902. }
  1903. }
  1904. void UpdateRowNames()
  1905. {
  1906. mRowNames.clear();
  1907. if ((mLabelParams.mRowNameIdx >= 0) &&
  1908. (static_cast<int>(mData.size()) >
  1909. (mLabelParams.mColumnNameIdx + 1)))
  1910. {
  1911. size_t i = 0;
  1912. for (auto& dataRow : mData)
  1913. {
  1914. if (static_cast<int>(dataRow.size()) > mLabelParams.mRowNameIdx)
  1915. {
  1916. mRowNames[dataRow[static_cast<size_t>(mLabelParams.mRowNameIdx)]] = i++;
  1917. }
  1918. }
  1919. }
  1920. }
  1921. #ifdef HAS_CODECVT
  1922. #if defined(_MSC_VER)
  1923. #pragma warning (push)
  1924. #pragma warning (disable: 4996)
  1925. #endif
  1926. static std::string ToString(const std::wstring& pWStr)
  1927. {
  1928. return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.to_bytes(pWStr);
  1929. }
  1930. static std::wstring ToWString(const std::string& pStr)
  1931. {
  1932. return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.from_bytes(pStr);
  1933. }
  1934. #if defined(_MSC_VER)
  1935. #pragma warning (pop)
  1936. #endif
  1937. #endif
  1938. static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace)
  1939. {
  1940. size_t pos = 0;
  1941. while ((pos = pStr.find(pSearch, pos)) != std::string::npos)
  1942. {
  1943. pStr.replace(pos, pSearch.size(), pReplace);
  1944. pos += pReplace.size();
  1945. }
  1946. }
  1947. private:
  1948. std::string mPath;
  1949. LabelParams mLabelParams;
  1950. SeparatorParams mSeparatorParams;
  1951. ConverterParams mConverterParams;
  1952. LineReaderParams mLineReaderParams;
  1953. std::vector<std::vector<std::string>> mData;
  1954. std::map<std::string, size_t> mColumnNames;
  1955. std::map<std::string, size_t> mRowNames;
  1956. #ifdef HAS_CODECVT
  1957. bool mIsUtf16 = false;
  1958. bool mIsLE = false;
  1959. #endif
  1960. bool mHasUtf8BOM = false;
  1961. };
  1962. }