index.cjs 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422
  1. 'use strict';
  2. var stream = require('stream');
  3. const is_object = function(obj){
  4. return (typeof obj === 'object' && obj !== null && !Array.isArray(obj));
  5. };
  6. class CsvError extends Error {
  7. constructor(code, message, options, ...contexts) {
  8. if(Array.isArray(message)) message = message.join(' ').trim();
  9. super(message);
  10. if(Error.captureStackTrace !== undefined){
  11. Error.captureStackTrace(this, CsvError);
  12. }
  13. this.code = code;
  14. for(const context of contexts){
  15. for(const key in context){
  16. const value = context[key];
  17. this[key] = Buffer.isBuffer(value) ? value.toString(options.encoding) : value == null ? value : JSON.parse(JSON.stringify(value));
  18. }
  19. }
  20. }
  21. }
  22. const normalize_columns_array = function(columns){
  23. const normalizedColumns = [];
  24. for(let i = 0, l = columns.length; i < l; i++){
  25. const column = columns[i];
  26. if(column === undefined || column === null || column === false){
  27. normalizedColumns[i] = { disabled: true };
  28. }else if(typeof column === 'string'){
  29. normalizedColumns[i] = { name: column };
  30. }else if(is_object(column)){
  31. if(typeof column.name !== 'string'){
  32. throw new CsvError('CSV_OPTION_COLUMNS_MISSING_NAME', [
  33. 'Option columns missing name:',
  34. `property "name" is required at position ${i}`,
  35. 'when column is an object literal'
  36. ]);
  37. }
  38. normalizedColumns[i] = column;
  39. }else {
  40. throw new CsvError('CSV_INVALID_COLUMN_DEFINITION', [
  41. 'Invalid column definition:',
  42. 'expect a string or a literal object,',
  43. `got ${JSON.stringify(column)} at position ${i}`
  44. ]);
  45. }
  46. }
  47. return normalizedColumns;
  48. };
  49. class ResizeableBuffer{
  50. constructor(size=100){
  51. this.size = size;
  52. this.length = 0;
  53. this.buf = Buffer.allocUnsafe(size);
  54. }
  55. prepend(val){
  56. if(Buffer.isBuffer(val)){
  57. const length = this.length + val.length;
  58. if(length >= this.size){
  59. this.resize();
  60. if(length >= this.size){
  61. throw Error('INVALID_BUFFER_STATE');
  62. }
  63. }
  64. const buf = this.buf;
  65. this.buf = Buffer.allocUnsafe(this.size);
  66. val.copy(this.buf, 0);
  67. buf.copy(this.buf, val.length);
  68. this.length += val.length;
  69. }else {
  70. const length = this.length++;
  71. if(length === this.size){
  72. this.resize();
  73. }
  74. const buf = this.clone();
  75. this.buf[0] = val;
  76. buf.copy(this.buf,1, 0, length);
  77. }
  78. }
  79. append(val){
  80. const length = this.length++;
  81. if(length === this.size){
  82. this.resize();
  83. }
  84. this.buf[length] = val;
  85. }
  86. clone(){
  87. return Buffer.from(this.buf.slice(0, this.length));
  88. }
  89. resize(){
  90. const length = this.length;
  91. this.size = this.size * 2;
  92. const buf = Buffer.allocUnsafe(this.size);
  93. this.buf.copy(buf,0, 0, length);
  94. this.buf = buf;
  95. }
  96. toString(encoding){
  97. if(encoding){
  98. return this.buf.slice(0, this.length).toString(encoding);
  99. }else {
  100. return Uint8Array.prototype.slice.call(this.buf.slice(0, this.length));
  101. }
  102. }
  103. toJSON(){
  104. return this.toString('utf8');
  105. }
  106. reset(){
  107. this.length = 0;
  108. }
  109. }
  110. // white space characters
  111. // https://en.wikipedia.org/wiki/Whitespace_character
  112. // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#Types
  113. // \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff
  114. const np = 12;
  115. const cr$1 = 13; // `\r`, carriage return, 0x0D in hexadécimal, 13 in decimal
  116. const nl$1 = 10; // `\n`, newline, 0x0A in hexadecimal, 10 in decimal
  117. const space = 32;
  118. const tab = 9;
  119. const init_state = function(options){
  120. return {
  121. bomSkipped: false,
  122. bufBytesStart: 0,
  123. castField: options.cast_function,
  124. commenting: false,
  125. // Current error encountered by a record
  126. error: undefined,
  127. enabled: options.from_line === 1,
  128. escaping: false,
  129. escapeIsQuote: Buffer.isBuffer(options.escape) && Buffer.isBuffer(options.quote) && Buffer.compare(options.escape, options.quote) === 0,
  130. // columns can be `false`, `true`, `Array`
  131. expectedRecordLength: Array.isArray(options.columns) ? options.columns.length : undefined,
  132. field: new ResizeableBuffer(20),
  133. firstLineToHeaders: options.cast_first_line_to_header,
  134. needMoreDataSize: Math.max(
  135. // Skip if the remaining buffer smaller than comment
  136. options.comment !== null ? options.comment.length : 0,
  137. // Skip if the remaining buffer can be delimiter
  138. ...options.delimiter.map((delimiter) => delimiter.length),
  139. // Skip if the remaining buffer can be escape sequence
  140. options.quote !== null ? options.quote.length : 0,
  141. ),
  142. previousBuf: undefined,
  143. quoting: false,
  144. stop: false,
  145. rawBuffer: new ResizeableBuffer(100),
  146. record: [],
  147. recordHasError: false,
  148. record_length: 0,
  149. recordDelimiterMaxLength: options.record_delimiter.length === 0 ? 0 : Math.max(...options.record_delimiter.map((v) => v.length)),
  150. trimChars: [Buffer.from(' ', options.encoding)[0], Buffer.from('\t', options.encoding)[0]],
  151. wasQuoting: false,
  152. wasRowDelimiter: false,
  153. timchars: [
  154. Buffer.from(Buffer.from([cr$1], 'utf8').toString(), options.encoding),
  155. Buffer.from(Buffer.from([nl$1], 'utf8').toString(), options.encoding),
  156. Buffer.from(Buffer.from([np], 'utf8').toString(), options.encoding),
  157. Buffer.from(Buffer.from([space], 'utf8').toString(), options.encoding),
  158. Buffer.from(Buffer.from([tab], 'utf8').toString(), options.encoding),
  159. ]
  160. };
  161. };
  162. const underscore = function(str){
  163. return str.replace(/([A-Z])/g, function(_, match){
  164. return '_' + match.toLowerCase();
  165. });
  166. };
  167. const normalize_options = function(opts){
  168. const options = {};
  169. // Merge with user options
  170. for(const opt in opts){
  171. options[underscore(opt)] = opts[opt];
  172. }
  173. // Normalize option `encoding`
  174. // Note: defined first because other options depends on it
  175. // to convert chars/strings into buffers.
  176. if(options.encoding === undefined || options.encoding === true){
  177. options.encoding = 'utf8';
  178. }else if(options.encoding === null || options.encoding === false){
  179. options.encoding = null;
  180. }else if(typeof options.encoding !== 'string' && options.encoding !== null){
  181. throw new CsvError('CSV_INVALID_OPTION_ENCODING', [
  182. 'Invalid option encoding:',
  183. 'encoding must be a string or null to return a buffer,',
  184. `got ${JSON.stringify(options.encoding)}`
  185. ], options);
  186. }
  187. // Normalize option `bom`
  188. if(options.bom === undefined || options.bom === null || options.bom === false){
  189. options.bom = false;
  190. }else if(options.bom !== true){
  191. throw new CsvError('CSV_INVALID_OPTION_BOM', [
  192. 'Invalid option bom:', 'bom must be true,',
  193. `got ${JSON.stringify(options.bom)}`
  194. ], options);
  195. }
  196. // Normalize option `cast`
  197. options.cast_function = null;
  198. if(options.cast === undefined || options.cast === null || options.cast === false || options.cast === ''){
  199. options.cast = undefined;
  200. }else if(typeof options.cast === 'function'){
  201. options.cast_function = options.cast;
  202. options.cast = true;
  203. }else if(options.cast !== true){
  204. throw new CsvError('CSV_INVALID_OPTION_CAST', [
  205. 'Invalid option cast:', 'cast must be true or a function,',
  206. `got ${JSON.stringify(options.cast)}`
  207. ], options);
  208. }
  209. // Normalize option `cast_date`
  210. if(options.cast_date === undefined || options.cast_date === null || options.cast_date === false || options.cast_date === ''){
  211. options.cast_date = false;
  212. }else if(options.cast_date === true){
  213. options.cast_date = function(value){
  214. const date = Date.parse(value);
  215. return !isNaN(date) ? new Date(date) : value;
  216. };
  217. }else if (typeof options.cast_date !== 'function'){
  218. throw new CsvError('CSV_INVALID_OPTION_CAST_DATE', [
  219. 'Invalid option cast_date:', 'cast_date must be true or a function,',
  220. `got ${JSON.stringify(options.cast_date)}`
  221. ], options);
  222. }
  223. // Normalize option `columns`
  224. options.cast_first_line_to_header = null;
  225. if(options.columns === true){
  226. // Fields in the first line are converted as-is to columns
  227. options.cast_first_line_to_header = undefined;
  228. }else if(typeof options.columns === 'function'){
  229. options.cast_first_line_to_header = options.columns;
  230. options.columns = true;
  231. }else if(Array.isArray(options.columns)){
  232. options.columns = normalize_columns_array(options.columns);
  233. }else if(options.columns === undefined || options.columns === null || options.columns === false){
  234. options.columns = false;
  235. }else {
  236. throw new CsvError('CSV_INVALID_OPTION_COLUMNS', [
  237. 'Invalid option columns:',
  238. 'expect an array, a function or true,',
  239. `got ${JSON.stringify(options.columns)}`
  240. ], options);
  241. }
  242. // Normalize option `group_columns_by_name`
  243. if(options.group_columns_by_name === undefined || options.group_columns_by_name === null || options.group_columns_by_name === false){
  244. options.group_columns_by_name = false;
  245. }else if(options.group_columns_by_name !== true){
  246. throw new CsvError('CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME', [
  247. 'Invalid option group_columns_by_name:',
  248. 'expect an boolean,',
  249. `got ${JSON.stringify(options.group_columns_by_name)}`
  250. ], options);
  251. }else if(options.columns === false){
  252. throw new CsvError('CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME', [
  253. 'Invalid option group_columns_by_name:',
  254. 'the `columns` mode must be activated.'
  255. ], options);
  256. }
  257. // Normalize option `comment`
  258. if(options.comment === undefined || options.comment === null || options.comment === false || options.comment === ''){
  259. options.comment = null;
  260. }else {
  261. if(typeof options.comment === 'string'){
  262. options.comment = Buffer.from(options.comment, options.encoding);
  263. }
  264. if(!Buffer.isBuffer(options.comment)){
  265. throw new CsvError('CSV_INVALID_OPTION_COMMENT', [
  266. 'Invalid option comment:',
  267. 'comment must be a buffer or a string,',
  268. `got ${JSON.stringify(options.comment)}`
  269. ], options);
  270. }
  271. }
  272. // Normalize option `comment_no_infix`
  273. if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){
  274. options.comment_no_infix = false;
  275. }else if(options.comment_no_infix !== true){
  276. throw new CsvError('CSV_INVALID_OPTION_COMMENT', [
  277. 'Invalid option comment_no_infix:',
  278. 'value must be a boolean,',
  279. `got ${JSON.stringify(options.comment_no_infix)}`
  280. ], options);
  281. }
  282. // Normalize option `delimiter`
  283. const delimiter_json = JSON.stringify(options.delimiter);
  284. if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter];
  285. if(options.delimiter.length === 0){
  286. throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [
  287. 'Invalid option delimiter:',
  288. 'delimiter must be a non empty string or buffer or array of string|buffer,',
  289. `got ${delimiter_json}`
  290. ], options);
  291. }
  292. options.delimiter = options.delimiter.map(function(delimiter){
  293. if(delimiter === undefined || delimiter === null || delimiter === false){
  294. return Buffer.from(',', options.encoding);
  295. }
  296. if(typeof delimiter === 'string'){
  297. delimiter = Buffer.from(delimiter, options.encoding);
  298. }
  299. if(!Buffer.isBuffer(delimiter) || delimiter.length === 0){
  300. throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [
  301. 'Invalid option delimiter:',
  302. 'delimiter must be a non empty string or buffer or array of string|buffer,',
  303. `got ${delimiter_json}`
  304. ], options);
  305. }
  306. return delimiter;
  307. });
  308. // Normalize option `escape`
  309. if(options.escape === undefined || options.escape === true){
  310. options.escape = Buffer.from('"', options.encoding);
  311. }else if(typeof options.escape === 'string'){
  312. options.escape = Buffer.from(options.escape, options.encoding);
  313. }else if (options.escape === null || options.escape === false){
  314. options.escape = null;
  315. }
  316. if(options.escape !== null){
  317. if(!Buffer.isBuffer(options.escape)){
  318. throw new Error(`Invalid Option: escape must be a buffer, a string or a boolean, got ${JSON.stringify(options.escape)}`);
  319. }
  320. }
  321. // Normalize option `from`
  322. if(options.from === undefined || options.from === null){
  323. options.from = 1;
  324. }else {
  325. if(typeof options.from === 'string' && /\d+/.test(options.from)){
  326. options.from = parseInt(options.from);
  327. }
  328. if(Number.isInteger(options.from)){
  329. if(options.from < 0){
  330. throw new Error(`Invalid Option: from must be a positive integer, got ${JSON.stringify(opts.from)}`);
  331. }
  332. }else {
  333. throw new Error(`Invalid Option: from must be an integer, got ${JSON.stringify(options.from)}`);
  334. }
  335. }
  336. // Normalize option `from_line`
  337. if(options.from_line === undefined || options.from_line === null){
  338. options.from_line = 1;
  339. }else {
  340. if(typeof options.from_line === 'string' && /\d+/.test(options.from_line)){
  341. options.from_line = parseInt(options.from_line);
  342. }
  343. if(Number.isInteger(options.from_line)){
  344. if(options.from_line <= 0){
  345. throw new Error(`Invalid Option: from_line must be a positive integer greater than 0, got ${JSON.stringify(opts.from_line)}`);
  346. }
  347. }else {
  348. throw new Error(`Invalid Option: from_line must be an integer, got ${JSON.stringify(opts.from_line)}`);
  349. }
  350. }
  351. // Normalize options `ignore_last_delimiters`
  352. if(options.ignore_last_delimiters === undefined || options.ignore_last_delimiters === null){
  353. options.ignore_last_delimiters = false;
  354. }else if(typeof options.ignore_last_delimiters === 'number'){
  355. options.ignore_last_delimiters = Math.floor(options.ignore_last_delimiters);
  356. if(options.ignore_last_delimiters === 0){
  357. options.ignore_last_delimiters = false;
  358. }
  359. }else if(typeof options.ignore_last_delimiters !== 'boolean'){
  360. throw new CsvError('CSV_INVALID_OPTION_IGNORE_LAST_DELIMITERS', [
  361. 'Invalid option `ignore_last_delimiters`:',
  362. 'the value must be a boolean value or an integer,',
  363. `got ${JSON.stringify(options.ignore_last_delimiters)}`
  364. ], options);
  365. }
  366. if(options.ignore_last_delimiters === true && options.columns === false){
  367. throw new CsvError('CSV_IGNORE_LAST_DELIMITERS_REQUIRES_COLUMNS', [
  368. 'The option `ignore_last_delimiters`',
  369. 'requires the activation of the `columns` option'
  370. ], options);
  371. }
  372. // Normalize option `info`
  373. if(options.info === undefined || options.info === null || options.info === false){
  374. options.info = false;
  375. }else if(options.info !== true){
  376. throw new Error(`Invalid Option: info must be true, got ${JSON.stringify(options.info)}`);
  377. }
  378. // Normalize option `max_record_size`
  379. if(options.max_record_size === undefined || options.max_record_size === null || options.max_record_size === false){
  380. options.max_record_size = 0;
  381. }else if(Number.isInteger(options.max_record_size) && options.max_record_size >= 0);else if(typeof options.max_record_size === 'string' && /\d+/.test(options.max_record_size)){
  382. options.max_record_size = parseInt(options.max_record_size);
  383. }else {
  384. throw new Error(`Invalid Option: max_record_size must be a positive integer, got ${JSON.stringify(options.max_record_size)}`);
  385. }
  386. // Normalize option `objname`
  387. if(options.objname === undefined || options.objname === null || options.objname === false){
  388. options.objname = undefined;
  389. }else if(Buffer.isBuffer(options.objname)){
  390. if(options.objname.length === 0){
  391. throw new Error(`Invalid Option: objname must be a non empty buffer`);
  392. }
  393. if(options.encoding === null);else {
  394. options.objname = options.objname.toString(options.encoding);
  395. }
  396. }else if(typeof options.objname === 'string'){
  397. if(options.objname.length === 0){
  398. throw new Error(`Invalid Option: objname must be a non empty string`);
  399. }
  400. // Great, nothing to do
  401. }else if(typeof options.objname === 'number');else {
  402. throw new Error(`Invalid Option: objname must be a string or a buffer, got ${options.objname}`);
  403. }
  404. if(options.objname !== undefined){
  405. if(typeof options.objname === 'number'){
  406. if(options.columns !== false){
  407. throw Error('Invalid Option: objname index cannot be combined with columns or be defined as a field');
  408. }
  409. }else { // A string or a buffer
  410. if(options.columns === false){
  411. throw Error('Invalid Option: objname field must be combined with columns or be defined as an index');
  412. }
  413. }
  414. }
  415. // Normalize option `on_record`
  416. if(options.on_record === undefined || options.on_record === null){
  417. options.on_record = undefined;
  418. }else if(typeof options.on_record !== 'function'){
  419. throw new CsvError('CSV_INVALID_OPTION_ON_RECORD', [
  420. 'Invalid option `on_record`:',
  421. 'expect a function,',
  422. `got ${JSON.stringify(options.on_record)}`
  423. ], options);
  424. }
  425. // Normalize option `quote`
  426. if(options.quote === null || options.quote === false || options.quote === ''){
  427. options.quote = null;
  428. }else {
  429. if(options.quote === undefined || options.quote === true){
  430. options.quote = Buffer.from('"', options.encoding);
  431. }else if(typeof options.quote === 'string'){
  432. options.quote = Buffer.from(options.quote, options.encoding);
  433. }
  434. if(!Buffer.isBuffer(options.quote)){
  435. throw new Error(`Invalid Option: quote must be a buffer or a string, got ${JSON.stringify(options.quote)}`);
  436. }
  437. }
  438. // Normalize option `raw`
  439. if(options.raw === undefined || options.raw === null || options.raw === false){
  440. options.raw = false;
  441. }else if(options.raw !== true){
  442. throw new Error(`Invalid Option: raw must be true, got ${JSON.stringify(options.raw)}`);
  443. }
  444. // Normalize option `record_delimiter`
  445. if(options.record_delimiter === undefined){
  446. options.record_delimiter = [];
  447. }else if(typeof options.record_delimiter === 'string' || Buffer.isBuffer(options.record_delimiter)){
  448. if(options.record_delimiter.length === 0){
  449. throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
  450. 'Invalid option `record_delimiter`:',
  451. 'value must be a non empty string or buffer,',
  452. `got ${JSON.stringify(options.record_delimiter)}`
  453. ], options);
  454. }
  455. options.record_delimiter = [options.record_delimiter];
  456. }else if(!Array.isArray(options.record_delimiter)){
  457. throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
  458. 'Invalid option `record_delimiter`:',
  459. 'value must be a string, a buffer or array of string|buffer,',
  460. `got ${JSON.stringify(options.record_delimiter)}`
  461. ], options);
  462. }
  463. options.record_delimiter = options.record_delimiter.map(function(rd, i){
  464. if(typeof rd !== 'string' && ! Buffer.isBuffer(rd)){
  465. throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
  466. 'Invalid option `record_delimiter`:',
  467. 'value must be a string, a buffer or array of string|buffer',
  468. `at index ${i},`,
  469. `got ${JSON.stringify(rd)}`
  470. ], options);
  471. }else if(rd.length === 0){
  472. throw new CsvError('CSV_INVALID_OPTION_RECORD_DELIMITER', [
  473. 'Invalid option `record_delimiter`:',
  474. 'value must be a non empty string or buffer',
  475. `at index ${i},`,
  476. `got ${JSON.stringify(rd)}`
  477. ], options);
  478. }
  479. if(typeof rd === 'string'){
  480. rd = Buffer.from(rd, options.encoding);
  481. }
  482. return rd;
  483. });
  484. // Normalize option `relax_column_count`
  485. if(typeof options.relax_column_count === 'boolean');else if(options.relax_column_count === undefined || options.relax_column_count === null){
  486. options.relax_column_count = false;
  487. }else {
  488. throw new Error(`Invalid Option: relax_column_count must be a boolean, got ${JSON.stringify(options.relax_column_count)}`);
  489. }
  490. if(typeof options.relax_column_count_less === 'boolean');else if(options.relax_column_count_less === undefined || options.relax_column_count_less === null){
  491. options.relax_column_count_less = false;
  492. }else {
  493. throw new Error(`Invalid Option: relax_column_count_less must be a boolean, got ${JSON.stringify(options.relax_column_count_less)}`);
  494. }
  495. if(typeof options.relax_column_count_more === 'boolean');else if(options.relax_column_count_more === undefined || options.relax_column_count_more === null){
  496. options.relax_column_count_more = false;
  497. }else {
  498. throw new Error(`Invalid Option: relax_column_count_more must be a boolean, got ${JSON.stringify(options.relax_column_count_more)}`);
  499. }
  500. // Normalize option `relax_quotes`
  501. if(typeof options.relax_quotes === 'boolean');else if(options.relax_quotes === undefined || options.relax_quotes === null){
  502. options.relax_quotes = false;
  503. }else {
  504. throw new Error(`Invalid Option: relax_quotes must be a boolean, got ${JSON.stringify(options.relax_quotes)}`);
  505. }
  506. // Normalize option `skip_empty_lines`
  507. if(typeof options.skip_empty_lines === 'boolean');else if(options.skip_empty_lines === undefined || options.skip_empty_lines === null){
  508. options.skip_empty_lines = false;
  509. }else {
  510. throw new Error(`Invalid Option: skip_empty_lines must be a boolean, got ${JSON.stringify(options.skip_empty_lines)}`);
  511. }
  512. // Normalize option `skip_records_with_empty_values`
  513. if(typeof options.skip_records_with_empty_values === 'boolean');else if(options.skip_records_with_empty_values === undefined || options.skip_records_with_empty_values === null){
  514. options.skip_records_with_empty_values = false;
  515. }else {
  516. throw new Error(`Invalid Option: skip_records_with_empty_values must be a boolean, got ${JSON.stringify(options.skip_records_with_empty_values)}`);
  517. }
  518. // Normalize option `skip_records_with_error`
  519. if(typeof options.skip_records_with_error === 'boolean');else if(options.skip_records_with_error === undefined || options.skip_records_with_error === null){
  520. options.skip_records_with_error = false;
  521. }else {
  522. throw new Error(`Invalid Option: skip_records_with_error must be a boolean, got ${JSON.stringify(options.skip_records_with_error)}`);
  523. }
  524. // Normalize option `rtrim`
  525. if(options.rtrim === undefined || options.rtrim === null || options.rtrim === false){
  526. options.rtrim = false;
  527. }else if(options.rtrim !== true){
  528. throw new Error(`Invalid Option: rtrim must be a boolean, got ${JSON.stringify(options.rtrim)}`);
  529. }
  530. // Normalize option `ltrim`
  531. if(options.ltrim === undefined || options.ltrim === null || options.ltrim === false){
  532. options.ltrim = false;
  533. }else if(options.ltrim !== true){
  534. throw new Error(`Invalid Option: ltrim must be a boolean, got ${JSON.stringify(options.ltrim)}`);
  535. }
  536. // Normalize option `trim`
  537. if(options.trim === undefined || options.trim === null || options.trim === false){
  538. options.trim = false;
  539. }else if(options.trim !== true){
  540. throw new Error(`Invalid Option: trim must be a boolean, got ${JSON.stringify(options.trim)}`);
  541. }
  542. // Normalize options `trim`, `ltrim` and `rtrim`
  543. if(options.trim === true && opts.ltrim !== false){
  544. options.ltrim = true;
  545. }else if(options.ltrim !== true){
  546. options.ltrim = false;
  547. }
  548. if(options.trim === true && opts.rtrim !== false){
  549. options.rtrim = true;
  550. }else if(options.rtrim !== true){
  551. options.rtrim = false;
  552. }
  553. // Normalize option `to`
  554. if(options.to === undefined || options.to === null){
  555. options.to = -1;
  556. }else {
  557. if(typeof options.to === 'string' && /\d+/.test(options.to)){
  558. options.to = parseInt(options.to);
  559. }
  560. if(Number.isInteger(options.to)){
  561. if(options.to <= 0){
  562. throw new Error(`Invalid Option: to must be a positive integer greater than 0, got ${JSON.stringify(opts.to)}`);
  563. }
  564. }else {
  565. throw new Error(`Invalid Option: to must be an integer, got ${JSON.stringify(opts.to)}`);
  566. }
  567. }
  568. // Normalize option `to_line`
  569. if(options.to_line === undefined || options.to_line === null){
  570. options.to_line = -1;
  571. }else {
  572. if(typeof options.to_line === 'string' && /\d+/.test(options.to_line)){
  573. options.to_line = parseInt(options.to_line);
  574. }
  575. if(Number.isInteger(options.to_line)){
  576. if(options.to_line <= 0){
  577. throw new Error(`Invalid Option: to_line must be a positive integer greater than 0, got ${JSON.stringify(opts.to_line)}`);
  578. }
  579. }else {
  580. throw new Error(`Invalid Option: to_line must be an integer, got ${JSON.stringify(opts.to_line)}`);
  581. }
  582. }
  583. return options;
  584. };
  585. const isRecordEmpty = function(record){
  586. return record.every((field) => field == null || field.toString && field.toString().trim() === '');
  587. };
  588. const cr = 13; // `\r`, carriage return, 0x0D in hexadécimal, 13 in decimal
  589. const nl = 10; // `\n`, newline, 0x0A in hexadecimal, 10 in decimal
  590. const boms = {
  591. // Note, the following are equals:
  592. // Buffer.from("\ufeff")
  593. // Buffer.from([239, 187, 191])
  594. // Buffer.from('EFBBBF', 'hex')
  595. 'utf8': Buffer.from([239, 187, 191]),
  596. // Note, the following are equals:
  597. // Buffer.from "\ufeff", 'utf16le
  598. // Buffer.from([255, 254])
  599. 'utf16le': Buffer.from([255, 254])
  600. };
  601. const transform = function(original_options = {}) {
  602. const info = {
  603. bytes: 0,
  604. comment_lines: 0,
  605. empty_lines: 0,
  606. invalid_field_length: 0,
  607. lines: 1,
  608. records: 0
  609. };
  610. const options = normalize_options(original_options);
  611. return {
  612. info: info,
  613. original_options: original_options,
  614. options: options,
  615. state: init_state(options),
  616. __needMoreData: function(i, bufLen, end){
  617. if(end) return false;
  618. const {encoding, escape, quote} = this.options;
  619. const {quoting, needMoreDataSize, recordDelimiterMaxLength} = this.state;
  620. const numOfCharLeft = bufLen - i - 1;
  621. const requiredLength = Math.max(
  622. needMoreDataSize,
  623. // Skip if the remaining buffer smaller than record delimiter
  624. // If "record_delimiter" is yet to be discovered:
  625. // 1. It is equals to `[]` and "recordDelimiterMaxLength" equals `0`
  626. // 2. We set the length to windows line ending in the current encoding
  627. // Note, that encoding is known from user or bom discovery at that point
  628. // recordDelimiterMaxLength,
  629. recordDelimiterMaxLength === 0 ? Buffer.from('\r\n', encoding).length : recordDelimiterMaxLength,
  630. // Skip if remaining buffer can be an escaped quote
  631. quoting ? ((escape === null ? 0 : escape.length) + quote.length) : 0,
  632. // Skip if remaining buffer can be record delimiter following the closing quote
  633. quoting ? (quote.length + recordDelimiterMaxLength) : 0,
  634. );
  635. return numOfCharLeft < requiredLength;
  636. },
  637. // Central parser implementation
  638. parse: function(nextBuf, end, push, close){
  639. const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options;
  640. let {comment, escape, quote, record_delimiter} = this.options;
  641. const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state;
  642. let buf;
  643. if(previousBuf === undefined){
  644. if(nextBuf === undefined){
  645. // Handle empty string
  646. close();
  647. return;
  648. }else {
  649. buf = nextBuf;
  650. }
  651. }else if(previousBuf !== undefined && nextBuf === undefined){
  652. buf = previousBuf;
  653. }else {
  654. buf = Buffer.concat([previousBuf, nextBuf]);
  655. }
  656. // Handle UTF BOM
  657. if(bomSkipped === false){
  658. if(bom === false){
  659. this.state.bomSkipped = true;
  660. }else if(buf.length < 3){
  661. // No enough data
  662. if(end === false){
  663. // Wait for more data
  664. this.state.previousBuf = buf;
  665. return;
  666. }
  667. }else {
  668. for(const encoding in boms){
  669. if(boms[encoding].compare(buf, 0, boms[encoding].length) === 0){
  670. // Skip BOM
  671. const bomLength = boms[encoding].length;
  672. this.state.bufBytesStart += bomLength;
  673. buf = buf.slice(bomLength);
  674. // Renormalize original options with the new encoding
  675. this.options = normalize_options({...this.original_options, encoding: encoding});
  676. // Options will re-evaluate the Buffer with the new encoding
  677. ({comment, escape, quote } = this.options);
  678. break;
  679. }
  680. }
  681. this.state.bomSkipped = true;
  682. }
  683. }
  684. const bufLen = buf.length;
  685. let pos;
  686. for(pos = 0; pos < bufLen; pos++){
  687. // Ensure we get enough space to look ahead
  688. // There should be a way to move this out of the loop
  689. if(this.__needMoreData(pos, bufLen, end)){
  690. break;
  691. }
  692. if(this.state.wasRowDelimiter === true){
  693. this.info.lines++;
  694. this.state.wasRowDelimiter = false;
  695. }
  696. if(to_line !== -1 && this.info.lines > to_line){
  697. this.state.stop = true;
  698. close();
  699. return;
  700. }
  701. // Auto discovery of record_delimiter, unix, mac and windows supported
  702. if(this.state.quoting === false && record_delimiter.length === 0){
  703. const record_delimiterCount = this.__autoDiscoverRecordDelimiter(buf, pos);
  704. if(record_delimiterCount){
  705. record_delimiter = this.options.record_delimiter;
  706. }
  707. }
  708. const chr = buf[pos];
  709. if(raw === true){
  710. rawBuffer.append(chr);
  711. }
  712. if((chr === cr || chr === nl) && this.state.wasRowDelimiter === false){
  713. this.state.wasRowDelimiter = true;
  714. }
  715. // Previous char was a valid escape char
  716. // treat the current char as a regular char
  717. if(this.state.escaping === true){
  718. this.state.escaping = false;
  719. }else {
  720. // Escape is only active inside quoted fields
  721. // We are quoting, the char is an escape chr and there is a chr to escape
  722. // if(escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen){
  723. if(escape !== null && this.state.quoting === true && this.__isEscape(buf, pos, chr) && pos + escape.length < bufLen){
  724. if(escapeIsQuote){
  725. if(this.__isQuote(buf, pos+escape.length)){
  726. this.state.escaping = true;
  727. pos += escape.length - 1;
  728. continue;
  729. }
  730. }else {
  731. this.state.escaping = true;
  732. pos += escape.length - 1;
  733. continue;
  734. }
  735. }
  736. // Not currently escaping and chr is a quote
  737. // TODO: need to compare bytes instead of single char
  738. if(this.state.commenting === false && this.__isQuote(buf, pos)){
  739. if(this.state.quoting === true){
  740. const nextChr = buf[pos+quote.length];
  741. const isNextChrTrimable = rtrim && this.__isCharTrimable(buf, pos+quote.length);
  742. const isNextChrComment = comment !== null && this.__compareBytes(comment, buf, pos+quote.length, nextChr);
  743. const isNextChrDelimiter = this.__isDelimiter(buf, pos+quote.length, nextChr);
  744. const isNextChrRecordDelimiter = record_delimiter.length === 0 ? this.__autoDiscoverRecordDelimiter(buf, pos+quote.length) : this.__isRecordDelimiter(nextChr, buf, pos+quote.length);
  745. // Escape a quote
  746. // Treat next char as a regular character
  747. if(escape !== null && this.__isEscape(buf, pos, chr) && this.__isQuote(buf, pos + escape.length)){
  748. pos += escape.length - 1;
  749. }else if(!nextChr || isNextChrDelimiter || isNextChrRecordDelimiter || isNextChrComment || isNextChrTrimable){
  750. this.state.quoting = false;
  751. this.state.wasQuoting = true;
  752. pos += quote.length - 1;
  753. continue;
  754. }else if(relax_quotes === false){
  755. const err = this.__error(
  756. new CsvError('CSV_INVALID_CLOSING_QUOTE', [
  757. 'Invalid Closing Quote:',
  758. `got "${String.fromCharCode(nextChr)}"`,
  759. `at line ${this.info.lines}`,
  760. 'instead of delimiter, record delimiter, trimable character',
  761. '(if activated) or comment',
  762. ], this.options, this.__infoField())
  763. );
  764. if(err !== undefined) return err;
  765. }else {
  766. this.state.quoting = false;
  767. this.state.wasQuoting = true;
  768. this.state.field.prepend(quote);
  769. pos += quote.length - 1;
  770. }
  771. }else {
  772. if(this.state.field.length !== 0){
  773. // In relax_quotes mode, treat opening quote preceded by chrs as regular
  774. if(relax_quotes === false){
  775. const info = this.__infoField();
  776. const bom = Object.keys(boms).map(b => boms[b].equals(this.state.field.toString()) ? b : false).filter(Boolean)[0];
  777. const err = this.__error(
  778. new CsvError('INVALID_OPENING_QUOTE', [
  779. 'Invalid Opening Quote:',
  780. `a quote is found on field ${JSON.stringify(info.column)} at line ${info.lines}, value is ${JSON.stringify(this.state.field.toString(encoding))}`,
  781. bom ? `(${bom} bom)` : undefined
  782. ], this.options, info, {
  783. field: this.state.field,
  784. })
  785. );
  786. if(err !== undefined) return err;
  787. }
  788. }else {
  789. this.state.quoting = true;
  790. pos += quote.length - 1;
  791. continue;
  792. }
  793. }
  794. }
  795. if(this.state.quoting === false){
  796. const recordDelimiterLength = this.__isRecordDelimiter(chr, buf, pos);
  797. if(recordDelimiterLength !== 0){
  798. // Do not emit comments which take a full line
  799. const skipCommentLine = this.state.commenting && (this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0);
  800. if(skipCommentLine){
  801. this.info.comment_lines++;
  802. // Skip full comment line
  803. }else {
  804. // Activate records emition if above from_line
  805. if(this.state.enabled === false && this.info.lines + (this.state.wasRowDelimiter === true ? 1: 0) >= from_line){
  806. this.state.enabled = true;
  807. this.__resetField();
  808. this.__resetRecord();
  809. pos += recordDelimiterLength - 1;
  810. continue;
  811. }
  812. // Skip if line is empty and skip_empty_lines activated
  813. if(skip_empty_lines === true && this.state.wasQuoting === false && this.state.record.length === 0 && this.state.field.length === 0){
  814. this.info.empty_lines++;
  815. pos += recordDelimiterLength - 1;
  816. continue;
  817. }
  818. this.info.bytes = this.state.bufBytesStart + pos;
  819. const errField = this.__onField();
  820. if(errField !== undefined) return errField;
  821. this.info.bytes = this.state.bufBytesStart + pos + recordDelimiterLength;
  822. const errRecord = this.__onRecord(push);
  823. if(errRecord !== undefined) return errRecord;
  824. if(to !== -1 && this.info.records >= to){
  825. this.state.stop = true;
  826. close();
  827. return;
  828. }
  829. }
  830. this.state.commenting = false;
  831. pos += recordDelimiterLength - 1;
  832. continue;
  833. }
  834. if(this.state.commenting){
  835. continue;
  836. }
  837. const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr);
  838. if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){
  839. this.state.commenting = true;
  840. continue;
  841. }
  842. const delimiterLength = this.__isDelimiter(buf, pos, chr);
  843. if(delimiterLength !== 0){
  844. this.info.bytes = this.state.bufBytesStart + pos;
  845. const errField = this.__onField();
  846. if(errField !== undefined) return errField;
  847. pos += delimiterLength - 1;
  848. continue;
  849. }
  850. }
  851. }
  852. if(this.state.commenting === false){
  853. if(max_record_size !== 0 && this.state.record_length + this.state.field.length > max_record_size){
  854. return this.__error(
  855. new CsvError('CSV_MAX_RECORD_SIZE', [
  856. 'Max Record Size:',
  857. 'record exceed the maximum number of tolerated bytes',
  858. `of ${max_record_size}`,
  859. `at line ${this.info.lines}`,
  860. ], this.options, this.__infoField())
  861. );
  862. }
  863. }
  864. const lappend = ltrim === false || this.state.quoting === true || this.state.field.length !== 0 || !this.__isCharTrimable(buf, pos);
  865. // rtrim in non quoting is handle in __onField
  866. const rappend = rtrim === false || this.state.wasQuoting === false;
  867. if(lappend === true && rappend === true){
  868. this.state.field.append(chr);
  869. }else if(rtrim === true && !this.__isCharTrimable(buf, pos)){
  870. return this.__error(
  871. new CsvError('CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE', [
  872. 'Invalid Closing Quote:',
  873. 'found non trimable byte after quote',
  874. `at line ${this.info.lines}`,
  875. ], this.options, this.__infoField())
  876. );
  877. }else {
  878. if(lappend === false){
  879. pos += this.__isCharTrimable(buf, pos) - 1;
  880. }
  881. continue;
  882. }
  883. }
  884. if(end === true){
  885. // Ensure we are not ending in a quoting state
  886. if(this.state.quoting === true){
  887. const err = this.__error(
  888. new CsvError('CSV_QUOTE_NOT_CLOSED', [
  889. 'Quote Not Closed:',
  890. `the parsing is finished with an opening quote at line ${this.info.lines}`,
  891. ], this.options, this.__infoField())
  892. );
  893. if(err !== undefined) return err;
  894. }else {
  895. // Skip last line if it has no characters
  896. if(this.state.wasQuoting === true || this.state.record.length !== 0 || this.state.field.length !== 0){
  897. this.info.bytes = this.state.bufBytesStart + pos;
  898. const errField = this.__onField();
  899. if(errField !== undefined) return errField;
  900. const errRecord = this.__onRecord(push);
  901. if(errRecord !== undefined) return errRecord;
  902. }else if(this.state.wasRowDelimiter === true){
  903. this.info.empty_lines++;
  904. }else if(this.state.commenting === true){
  905. this.info.comment_lines++;
  906. }
  907. }
  908. }else {
  909. this.state.bufBytesStart += pos;
  910. this.state.previousBuf = buf.slice(pos);
  911. }
  912. if(this.state.wasRowDelimiter === true){
  913. this.info.lines++;
  914. this.state.wasRowDelimiter = false;
  915. }
  916. },
  917. __onRecord: function(push){
  918. const {columns, group_columns_by_name, encoding, info, from, relax_column_count, relax_column_count_less, relax_column_count_more, raw, skip_records_with_empty_values} = this.options;
  919. const {enabled, record} = this.state;
  920. if(enabled === false){
  921. return this.__resetRecord();
  922. }
  923. // Convert the first line into column names
  924. const recordLength = record.length;
  925. if(columns === true){
  926. if(skip_records_with_empty_values === true && isRecordEmpty(record)){
  927. this.__resetRecord();
  928. return;
  929. }
  930. return this.__firstLineToColumns(record);
  931. }
  932. if(columns === false && this.info.records === 0){
  933. this.state.expectedRecordLength = recordLength;
  934. }
  935. if(recordLength !== this.state.expectedRecordLength){
  936. const err = columns === false ?
  937. new CsvError('CSV_RECORD_INCONSISTENT_FIELDS_LENGTH', [
  938. 'Invalid Record Length:',
  939. `expect ${this.state.expectedRecordLength},`,
  940. `got ${recordLength} on line ${this.info.lines}`,
  941. ], this.options, this.__infoField(), {
  942. record: record,
  943. })
  944. :
  945. new CsvError('CSV_RECORD_INCONSISTENT_COLUMNS', [
  946. 'Invalid Record Length:',
  947. `columns length is ${columns.length},`, // rename columns
  948. `got ${recordLength} on line ${this.info.lines}`,
  949. ], this.options, this.__infoField(), {
  950. record: record,
  951. });
  952. if(relax_column_count === true ||
  953. (relax_column_count_less === true && recordLength < this.state.expectedRecordLength) ||
  954. (relax_column_count_more === true && recordLength > this.state.expectedRecordLength)){
  955. this.info.invalid_field_length++;
  956. this.state.error = err;
  957. // Error is undefined with skip_records_with_error
  958. }else {
  959. const finalErr = this.__error(err);
  960. if(finalErr) return finalErr;
  961. }
  962. }
  963. if(skip_records_with_empty_values === true && isRecordEmpty(record)){
  964. this.__resetRecord();
  965. return;
  966. }
  967. if(this.state.recordHasError === true){
  968. this.__resetRecord();
  969. this.state.recordHasError = false;
  970. return;
  971. }
  972. this.info.records++;
  973. if(from === 1 || this.info.records >= from){
  974. const {objname} = this.options;
  975. // With columns, records are object
  976. if(columns !== false){
  977. const obj = {};
  978. // Transform record array to an object
  979. for(let i = 0, l = record.length; i < l; i++){
  980. if(columns[i] === undefined || columns[i].disabled) continue;
  981. // Turn duplicate columns into an array
  982. if (group_columns_by_name === true && obj[columns[i].name] !== undefined) {
  983. if (Array.isArray(obj[columns[i].name])) {
  984. obj[columns[i].name] = obj[columns[i].name].concat(record[i]);
  985. } else {
  986. obj[columns[i].name] = [obj[columns[i].name], record[i]];
  987. }
  988. } else {
  989. obj[columns[i].name] = record[i];
  990. }
  991. }
  992. // Without objname (default)
  993. if(raw === true || info === true){
  994. const extRecord = Object.assign(
  995. {record: obj},
  996. (raw === true ? {raw: this.state.rawBuffer.toString(encoding)}: {}),
  997. (info === true ? {info: this.__infoRecord()}: {})
  998. );
  999. const err = this.__push(
  1000. objname === undefined ? extRecord : [obj[objname], extRecord]
  1001. , push);
  1002. if(err){
  1003. return err;
  1004. }
  1005. }else {
  1006. const err = this.__push(
  1007. objname === undefined ? obj : [obj[objname], obj]
  1008. , push);
  1009. if(err){
  1010. return err;
  1011. }
  1012. }
  1013. // Without columns, records are array
  1014. }else {
  1015. if(raw === true || info === true){
  1016. const extRecord = Object.assign(
  1017. {record: record},
  1018. raw === true ? {raw: this.state.rawBuffer.toString(encoding)}: {},
  1019. info === true ? {info: this.__infoRecord()}: {}
  1020. );
  1021. const err = this.__push(
  1022. objname === undefined ? extRecord : [record[objname], extRecord]
  1023. , push);
  1024. if(err){
  1025. return err;
  1026. }
  1027. }else {
  1028. const err = this.__push(
  1029. objname === undefined ? record : [record[objname], record]
  1030. , push);
  1031. if(err){
  1032. return err;
  1033. }
  1034. }
  1035. }
  1036. }
  1037. this.__resetRecord();
  1038. },
  1039. __firstLineToColumns: function(record){
  1040. const {firstLineToHeaders} = this.state;
  1041. try{
  1042. const headers = firstLineToHeaders === undefined ? record : firstLineToHeaders.call(null, record);
  1043. if(!Array.isArray(headers)){
  1044. return this.__error(
  1045. new CsvError('CSV_INVALID_COLUMN_MAPPING', [
  1046. 'Invalid Column Mapping:',
  1047. 'expect an array from column function,',
  1048. `got ${JSON.stringify(headers)}`
  1049. ], this.options, this.__infoField(), {
  1050. headers: headers,
  1051. })
  1052. );
  1053. }
  1054. const normalizedHeaders = normalize_columns_array(headers);
  1055. this.state.expectedRecordLength = normalizedHeaders.length;
  1056. this.options.columns = normalizedHeaders;
  1057. this.__resetRecord();
  1058. return;
  1059. }catch(err){
  1060. return err;
  1061. }
  1062. },
  1063. __resetRecord: function(){
  1064. if(this.options.raw === true){
  1065. this.state.rawBuffer.reset();
  1066. }
  1067. this.state.error = undefined;
  1068. this.state.record = [];
  1069. this.state.record_length = 0;
  1070. },
  1071. __onField: function(){
  1072. const {cast, encoding, rtrim, max_record_size} = this.options;
  1073. const {enabled, wasQuoting} = this.state;
  1074. // Short circuit for the from_line options
  1075. if(enabled === false){
  1076. return this.__resetField();
  1077. }
  1078. let field = this.state.field.toString(encoding);
  1079. if(rtrim === true && wasQuoting === false){
  1080. field = field.trimRight();
  1081. }
  1082. if(cast === true){
  1083. const [err, f] = this.__cast(field);
  1084. if(err !== undefined) return err;
  1085. field = f;
  1086. }
  1087. this.state.record.push(field);
  1088. // Increment record length if record size must not exceed a limit
  1089. if(max_record_size !== 0 && typeof field === 'string'){
  1090. this.state.record_length += field.length;
  1091. }
  1092. this.__resetField();
  1093. },
  1094. __resetField: function(){
  1095. this.state.field.reset();
  1096. this.state.wasQuoting = false;
  1097. },
  1098. __push: function(record, push){
  1099. const {on_record} = this.options;
  1100. if(on_record !== undefined){
  1101. const info = this.__infoRecord();
  1102. try{
  1103. record = on_record.call(null, record, info);
  1104. }catch(err){
  1105. return err;
  1106. }
  1107. if(record === undefined || record === null){ return; }
  1108. }
  1109. push(record);
  1110. },
  1111. // Return a tuple with the error and the casted value
  1112. __cast: function(field){
  1113. const {columns, relax_column_count} = this.options;
  1114. const isColumns = Array.isArray(columns);
  1115. // Dont loose time calling cast
  1116. // because the final record is an object
  1117. // and this field can't be associated to a key present in columns
  1118. if(isColumns === true && relax_column_count && this.options.columns.length <= this.state.record.length){
  1119. return [undefined, undefined];
  1120. }
  1121. if(this.state.castField !== null){
  1122. try{
  1123. const info = this.__infoField();
  1124. return [undefined, this.state.castField.call(null, field, info)];
  1125. }catch(err){
  1126. return [err];
  1127. }
  1128. }
  1129. if(this.__isFloat(field)){
  1130. return [undefined, parseFloat(field)];
  1131. }else if(this.options.cast_date !== false){
  1132. const info = this.__infoField();
  1133. return [undefined, this.options.cast_date.call(null, field, info)];
  1134. }
  1135. return [undefined, field];
  1136. },
  1137. // Helper to test if a character is a space or a line delimiter
  1138. __isCharTrimable: function(buf, pos){
  1139. const isTrim = (buf, pos) => {
  1140. const {timchars} = this.state;
  1141. loop1: for(let i = 0; i < timchars.length; i++){
  1142. const timchar = timchars[i];
  1143. for(let j = 0; j < timchar.length; j++){
  1144. if(timchar[j] !== buf[pos+j]) continue loop1;
  1145. }
  1146. return timchar.length;
  1147. }
  1148. return 0;
  1149. };
  1150. return isTrim(buf, pos);
  1151. },
  1152. // Keep it in case we implement the `cast_int` option
  1153. // __isInt(value){
  1154. // // return Number.isInteger(parseInt(value))
  1155. // // return !isNaN( parseInt( obj ) );
  1156. // return /^(\-|\+)?[1-9][0-9]*$/.test(value)
  1157. // }
  1158. __isFloat: function(value){
  1159. return (value - parseFloat(value) + 1) >= 0; // Borrowed from jquery
  1160. },
  1161. __compareBytes: function(sourceBuf, targetBuf, targetPos, firstByte){
  1162. if(sourceBuf[0] !== firstByte) return 0;
  1163. const sourceLength = sourceBuf.length;
  1164. for(let i = 1; i < sourceLength; i++){
  1165. if(sourceBuf[i] !== targetBuf[targetPos+i]) return 0;
  1166. }
  1167. return sourceLength;
  1168. },
  1169. __isDelimiter: function(buf, pos, chr){
  1170. const {delimiter, ignore_last_delimiters} = this.options;
  1171. if(ignore_last_delimiters === true && this.state.record.length === this.options.columns.length - 1){
  1172. return 0;
  1173. }else if(ignore_last_delimiters !== false && typeof ignore_last_delimiters === 'number' && this.state.record.length === ignore_last_delimiters - 1){
  1174. return 0;
  1175. }
  1176. loop1: for(let i = 0; i < delimiter.length; i++){
  1177. const del = delimiter[i];
  1178. if(del[0] === chr){
  1179. for(let j = 1; j < del.length; j++){
  1180. if(del[j] !== buf[pos+j]) continue loop1;
  1181. }
  1182. return del.length;
  1183. }
  1184. }
  1185. return 0;
  1186. },
  1187. __isRecordDelimiter: function(chr, buf, pos){
  1188. const {record_delimiter} = this.options;
  1189. const recordDelimiterLength = record_delimiter.length;
  1190. loop1: for(let i = 0; i < recordDelimiterLength; i++){
  1191. const rd = record_delimiter[i];
  1192. const rdLength = rd.length;
  1193. if(rd[0] !== chr){
  1194. continue;
  1195. }
  1196. for(let j = 1; j < rdLength; j++){
  1197. if(rd[j] !== buf[pos+j]){
  1198. continue loop1;
  1199. }
  1200. }
  1201. return rd.length;
  1202. }
  1203. return 0;
  1204. },
  1205. __isEscape: function(buf, pos, chr){
  1206. const {escape} = this.options;
  1207. if(escape === null) return false;
  1208. const l = escape.length;
  1209. if(escape[0] === chr){
  1210. for(let i = 0; i < l; i++){
  1211. if(escape[i] !== buf[pos+i]){
  1212. return false;
  1213. }
  1214. }
  1215. return true;
  1216. }
  1217. return false;
  1218. },
  1219. __isQuote: function(buf, pos){
  1220. const {quote} = this.options;
  1221. if(quote === null) return false;
  1222. const l = quote.length;
  1223. for(let i = 0; i < l; i++){
  1224. if(quote[i] !== buf[pos+i]){
  1225. return false;
  1226. }
  1227. }
  1228. return true;
  1229. },
  1230. __autoDiscoverRecordDelimiter: function(buf, pos){
  1231. const { encoding } = this.options;
  1232. // Note, we don't need to cache this information in state,
  1233. // It is only called on the first line until we find out a suitable
  1234. // record delimiter.
  1235. const rds = [
  1236. // Important, the windows line ending must be before mac os 9
  1237. Buffer.from('\r\n', encoding),
  1238. Buffer.from('\n', encoding),
  1239. Buffer.from('\r', encoding),
  1240. ];
  1241. loop: for(let i = 0; i < rds.length; i++){
  1242. const l = rds[i].length;
  1243. for(let j = 0; j < l; j++){
  1244. if(rds[i][j] !== buf[pos + j]){
  1245. continue loop;
  1246. }
  1247. }
  1248. this.options.record_delimiter.push(rds[i]);
  1249. this.state.recordDelimiterMaxLength = rds[i].length;
  1250. return rds[i].length;
  1251. }
  1252. return 0;
  1253. },
  1254. __error: function(msg){
  1255. const {encoding, raw, skip_records_with_error} = this.options;
  1256. const err = typeof msg === 'string' ? new Error(msg) : msg;
  1257. if(skip_records_with_error){
  1258. this.state.recordHasError = true;
  1259. if(this.options.on_skip !== undefined){
  1260. this.options.on_skip(err, raw ? this.state.rawBuffer.toString(encoding) : undefined);
  1261. }
  1262. // this.emit('skip', err, raw ? this.state.rawBuffer.toString(encoding) : undefined);
  1263. return undefined;
  1264. }else {
  1265. return err;
  1266. }
  1267. },
  1268. __infoDataSet: function(){
  1269. return {
  1270. ...this.info,
  1271. columns: this.options.columns
  1272. };
  1273. },
  1274. __infoRecord: function(){
  1275. const {columns, raw, encoding} = this.options;
  1276. return {
  1277. ...this.__infoDataSet(),
  1278. error: this.state.error,
  1279. header: columns === true,
  1280. index: this.state.record.length,
  1281. raw: raw ? this.state.rawBuffer.toString(encoding) : undefined
  1282. };
  1283. },
  1284. __infoField: function(){
  1285. const {columns} = this.options;
  1286. const isColumns = Array.isArray(columns);
  1287. return {
  1288. ...this.__infoRecord(),
  1289. column: isColumns === true ?
  1290. (columns.length > this.state.record.length ?
  1291. columns[this.state.record.length].name :
  1292. null
  1293. ) :
  1294. this.state.record.length,
  1295. quoting: this.state.wasQuoting,
  1296. };
  1297. }
  1298. };
  1299. };
  1300. class Parser extends stream.Transform {
  1301. constructor(opts = {}){
  1302. super({...{readableObjectMode: true}, ...opts, encoding: null});
  1303. this.api = transform(opts);
  1304. this.api.options.on_skip = (err, chunk) => {
  1305. this.emit('skip', err, chunk);
  1306. };
  1307. // Backward compatibility
  1308. this.state = this.api.state;
  1309. this.options = this.api.options;
  1310. this.info = this.api.info;
  1311. }
  1312. // Implementation of `Transform._transform`
  1313. _transform(buf, _, callback){
  1314. if(this.state.stop === true){
  1315. return;
  1316. }
  1317. const err = this.api.parse(buf, false, (record) => {
  1318. this.push(record);
  1319. }, () => {
  1320. this.push(null);
  1321. this.end();
  1322. // Fix #333 and break #410
  1323. // ko: api.stream.iterator.coffee
  1324. // ko with v21.4.0, ok with node v20.5.1: api.stream.finished # aborted (with generate())
  1325. // ko: api.stream.finished # aborted (with Readable)
  1326. // this.destroy()
  1327. // Fix #410 and partially break #333
  1328. // ok: api.stream.iterator.coffee
  1329. // ok: api.stream.finished # aborted (with generate())
  1330. // broken: api.stream.finished # aborted (with Readable)
  1331. this.on('end', this.destroy);
  1332. });
  1333. if(err !== undefined){
  1334. this.state.stop = true;
  1335. }
  1336. callback(err);
  1337. }
  1338. // Implementation of `Transform._flush`
  1339. _flush(callback){
  1340. if(this.state.stop === true){
  1341. return;
  1342. }
  1343. const err = this.api.parse(undefined, true, (record) => {
  1344. this.push(record);
  1345. }, () => {
  1346. this.push(null);
  1347. this.on('end', this.destroy);
  1348. });
  1349. callback(err);
  1350. }
  1351. }
  1352. const parse = function(){
  1353. let data, options, callback;
  1354. for(const i in arguments){
  1355. const argument = arguments[i];
  1356. const type = typeof argument;
  1357. if(data === undefined && (typeof argument === 'string' || Buffer.isBuffer(argument))){
  1358. data = argument;
  1359. }else if(options === undefined && is_object(argument)){
  1360. options = argument;
  1361. }else if(callback === undefined && type === 'function'){
  1362. callback = argument;
  1363. }else {
  1364. throw new CsvError('CSV_INVALID_ARGUMENT', [
  1365. 'Invalid argument:',
  1366. `got ${JSON.stringify(argument)} at index ${i}`
  1367. ], options || {});
  1368. }
  1369. }
  1370. const parser = new Parser(options);
  1371. if(callback){
  1372. const records = options === undefined || options.objname === undefined ? [] : {};
  1373. parser.on('readable', function(){
  1374. let record;
  1375. while((record = this.read()) !== null){
  1376. if(options === undefined || options.objname === undefined){
  1377. records.push(record);
  1378. }else {
  1379. records[record[0]] = record[1];
  1380. }
  1381. }
  1382. });
  1383. parser.on('error', function(err){
  1384. callback(err, undefined, parser.api.__infoDataSet());
  1385. });
  1386. parser.on('end', function(){
  1387. callback(undefined, records, parser.api.__infoDataSet());
  1388. });
  1389. }
  1390. if(data !== undefined){
  1391. const writer = function(){
  1392. parser.write(data);
  1393. parser.end();
  1394. };
  1395. // Support Deno, Rollup doesnt provide a shim for setImmediate
  1396. if(typeof setImmediate === 'function'){
  1397. setImmediate(writer);
  1398. }else {
  1399. setTimeout(writer, 0);
  1400. }
  1401. }
  1402. return parser;
  1403. };
  1404. exports.CsvError = CsvError;
  1405. exports.Parser = Parser;
  1406. exports.parse = parse;