From 1d4ea90fa01108e78d38ec27c2f566f6c6808c29 Mon Sep 17 00:00:00 2001 From: Ajaz Ur Rehman Date: Thu, 23 Apr 2020 21:06:19 +0530 Subject: [PATCH 1/5] add multiple column delimiter support --- lib/index.js | 44 ++++++++++++++++++++-------- test/option.delimiter.coffee | 57 ++++++++++++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 15 deletions(-) diff --git a/lib/index.js b/lib/index.js index bcc7b93..86c1ed6 100644 --- a/lib/index.js +++ b/lib/index.js @@ -95,20 +95,33 @@ class Parser extends Transform { } } // Normalize option `delimiter` + const delimiter_error = new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${JSON.stringify(options.delimiter)}` + ]) if(options.delimiter === undefined || options.delimiter === null || options.delimiter === false){ options.delimiter = Buffer.from(',') }else if(typeof options.delimiter === 'string' && options.delimiter.length !== 0){ options.delimiter = Buffer.from(options.delimiter) + }else if(Array.isArray(options.delimiter) && options.delimiter.length !== 0){ + options.delimiter = options.delimiter.map(function(c){ + if(c.length !== 0){ + if(typeof c === 'string'){ + return Buffer.from(c) + }else if(Buffer.isBuffer(c)){ + return c; + } + } + throw delimiter_error + }) }else if( (Buffer.isBuffer(options.delimiter) && options.delimiter.length === 0) || (typeof options.delimiter === 'string' && options.delimiter.length === 0) || - (!Buffer.isBuffer(options.delimiter) && typeof options.delimiter !== 'string') + (Array.isArray(options.delimiter) && options.delimiter.length === 0) || + (!Buffer.isBuffer(options.delimiter) && typeof options.delimiter !== 'string' && !Array.isArray(options.delimiter)) ){ - throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ - 'Invalid option delimiter:', - 'delimiter must be a non empty string or buffer,', - `got ${JSON.stringify(options.delimiter)}` - ]) + throw delimiter_error } // Normalize option `escape` if(options.escape === undefined || options.escape === null){ @@ -921,14 +934,21 @@ class Parser extends Transform { ) return numOfCharLeft < requiredLength } - __isDelimiter(chr, buf, pos){ - const {delimiter} = this.options + __isDelimiter(chr, buf, pos, delimiter = this.options.delimiter){ const delLength = delimiter.length - if(delimiter[0] !== chr) return 0 - for(let i = 1; i < delLength; i++){ - if(delimiter[i] !== buf[pos+i]) return 0 + if(Array.isArray(delimiter)){ + for(let i = 0; i < delLength; i++){ + const result = this.__isDelimiter(chr, buf, pos, delimiter[i]); + if(result !== 0) return result + } + return 0; + }else{ + if(delimiter[0] !== chr) return 0 + for(let i = 1; i < delLength; i++){ + if(delimiter[i] !== buf[pos+i]) return 0 + } + return delLength } - return delimiter.length } __isRecordDelimiter(chr, buf, pos){ const {record_delimiter} = this.options diff --git a/test/option.delimiter.coffee b/test/option.delimiter.coffee index b3ee6e0..fe16cc8 100644 --- a/test/option.delimiter.coffee +++ b/test/option.delimiter.coffee @@ -9,17 +9,32 @@ describe 'Option `delimiter`', -> (-> parse '', delimiter: '', (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got ""' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got ""' code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: Buffer.from(''), (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got {"type":"Buffer","data":[]}' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got {"type":"Buffer","data":[]}' code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: true, (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got true' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got true' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got []' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [""]' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [',',''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [",",""]' code: 'CSV_INVALID_OPTION_DELIMITER' it 'using default comma', (next) -> @@ -69,3 +84,39 @@ describe 'Option `delimiter`', -> [ '','1974','8.8392926E7','',''] ] next() + + it 'using array of a single delimiter', (next) -> + parse """ + abc,,123, + ,def,, + """, delimiter: [','], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next() + + it 'using array of a single delimiter of multiple characters', (next) -> + parse """ + !# + !# + """, delimiter: ['!#'], (err, data) -> + return next err if err + data.should.eql [ + [ '', ''] + [ '', ''] + ] + next() + + it 'using array of a multiple delimiters of variable length', (next) -> + parse """ + abc,;;123;; + ;;def;;, + """, delimiter: [',', ';;'], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next() From f3c3c7f05e14ccf7cb7ff9f06f001a05ce6e0d46 Mon Sep 17 00:00:00 2001 From: Ajaz Ur Rehman Date: Thu, 23 Apr 2020 22:48:04 +0530 Subject: [PATCH 2/5] Refactor delimiter to always be an array --- lib/index.js | 61 +++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/lib/index.js b/lib/index.js index 86c1ed6..896e6c2 100644 --- a/lib/index.js +++ b/lib/index.js @@ -95,34 +95,24 @@ class Parser extends Transform { } } // Normalize option `delimiter` - const delimiter_error = new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + let delimiter = options.delimiter + let delimiter_error = new CsvError('CSV_INVALID_OPTION_DELIMITER', [ 'Invalid option delimiter:', 'delimiter must be a non empty string or buffer or array of string|buffer,', - `got ${JSON.stringify(options.delimiter)}` + `got ${JSON.stringify(delimiter)}` ]) - if(options.delimiter === undefined || options.delimiter === null || options.delimiter === false){ - options.delimiter = Buffer.from(',') - }else if(typeof options.delimiter === 'string' && options.delimiter.length !== 0){ - options.delimiter = Buffer.from(options.delimiter) - }else if(Array.isArray(options.delimiter) && options.delimiter.length !== 0){ - options.delimiter = options.delimiter.map(function(c){ - if(c.length !== 0){ - if(typeof c === 'string'){ - return Buffer.from(c) - }else if(Buffer.isBuffer(c)){ - return c; - } - } - throw delimiter_error - }) - }else if( - (Buffer.isBuffer(options.delimiter) && options.delimiter.length === 0) || - (typeof options.delimiter === 'string' && options.delimiter.length === 0) || - (Array.isArray(options.delimiter) && options.delimiter.length === 0) || - (!Buffer.isBuffer(options.delimiter) && typeof options.delimiter !== 'string' && !Array.isArray(options.delimiter)) - ){ + if(!Array.isArray(delimiter)) delimiter = [delimiter] + if(delimiter.length === 0) throw delimiter_error + options.delimiter = delimiter.map(function(del){ + if(del === undefined || del === null || del === false){ + return Buffer.from(',') + }else if(typeof del === 'string' && del.length !== 0){ + return Buffer.from(del) + }else if(Buffer.isBuffer(del) && del.length !== 0){ + return del + } throw delimiter_error - } + }) // Normalize option `escape` if(options.escape === undefined || options.escape === null){ options.escape = Buffer.from('"') @@ -934,21 +924,18 @@ class Parser extends Transform { ) return numOfCharLeft < requiredLength } - __isDelimiter(chr, buf, pos, delimiter = this.options.delimiter){ - const delLength = delimiter.length - if(Array.isArray(delimiter)){ - for(let i = 0; i < delLength; i++){ - const result = this.__isDelimiter(chr, buf, pos, delimiter[i]); - if(result !== 0) return result - } - return 0; - }else{ - if(delimiter[0] !== chr) return 0 - for(let i = 1; i < delLength; i++){ - if(delimiter[i] !== buf[pos+i]) return 0 + __isDelimiter(chr, buf, pos){ + const {delimiter} = this.options + for(let i = 0; i < delimiter.length; i++){ + const d = delimiter[i]; + if(d[0] === chr){ + for(let j = 1; j < d.length; j++){ + if(d[j] !== buf[pos+j]) break + } + return d.length } - return delLength } + return 0 } __isRecordDelimiter(chr, buf, pos){ const {record_delimiter} = this.options From 49a8456900a2c76744415ce241c0eb8c04bd0a62 Mon Sep 17 00:00:00 2001 From: Ajaz Ur Rehman Date: Fri, 24 Apr 2020 09:09:53 +0530 Subject: [PATCH 3/5] Fix loop statement for __isDelimiter and refactor code for normalization of delimiter --- lib/index.js | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/lib/index.js b/lib/index.js index 896e6c2..0c45201 100644 --- a/lib/index.js +++ b/lib/index.js @@ -95,15 +95,16 @@ class Parser extends Transform { } } // Normalize option `delimiter` - let delimiter = options.delimiter - let delimiter_error = new CsvError('CSV_INVALID_OPTION_DELIMITER', [ - 'Invalid option delimiter:', - 'delimiter must be a non empty string or buffer or array of string|buffer,', - `got ${JSON.stringify(delimiter)}` - ]) - if(!Array.isArray(delimiter)) delimiter = [delimiter] - if(delimiter.length === 0) throw delimiter_error - options.delimiter = delimiter.map(function(del){ + const delimiter_json = JSON.stringify(options.delimiter) + if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter] + if(options.delimiter.length === 0){ + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` + ]) + } + options.delimiter = options.delimiter.map(function(del){ if(del === undefined || del === null || del === false){ return Buffer.from(',') }else if(typeof del === 'string' && del.length !== 0){ @@ -111,7 +112,11 @@ class Parser extends Transform { }else if(Buffer.isBuffer(del) && del.length !== 0){ return del } - throw delimiter_error + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` + ]) }) // Normalize option `escape` if(options.escape === undefined || options.escape === null){ @@ -926,13 +931,13 @@ class Parser extends Transform { } __isDelimiter(chr, buf, pos){ const {delimiter} = this.options - for(let i = 0; i < delimiter.length; i++){ - const d = delimiter[i]; - if(d[0] === chr){ - for(let j = 1; j < d.length; j++){ - if(d[j] !== buf[pos+j]) break + loop1: for(let i = 0; i < delimiter.length; i++){ + const del = delimiter[i]; + if(del[0] === chr){ + for(let j = 1; j < del.length; j++){ + if(del[j] !== buf[pos+j]) continue loop1 } - return d.length + return del.length } } return 0 From e5dfda3447c5bce48fa233aa0c2a37842ef0ecaf Mon Sep 17 00:00:00 2001 From: Ajaz Ur Rehman Date: Fri, 24 Apr 2020 13:59:56 +0530 Subject: [PATCH 4/5] Refactor code --- lib/index.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/index.js b/lib/index.js index 0c45201..fe6d158 100644 --- a/lib/index.js +++ b/lib/index.js @@ -107,10 +107,8 @@ class Parser extends Transform { options.delimiter = options.delimiter.map(function(del){ if(del === undefined || del === null || del === false){ return Buffer.from(',') - }else if(typeof del === 'string' && del.length !== 0){ + }else if((typeof del === 'string' || Buffer.isBuffer(del)) && del.length !== 0){ return Buffer.from(del) - }else if(Buffer.isBuffer(del) && del.length !== 0){ - return del } throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ 'Invalid option delimiter:', @@ -932,7 +930,7 @@ class Parser extends Transform { __isDelimiter(chr, buf, pos){ const {delimiter} = this.options loop1: for(let i = 0; i < delimiter.length; i++){ - const del = delimiter[i]; + const del = delimiter[i] if(del[0] === chr){ for(let j = 1; j < del.length; j++){ if(del[j] !== buf[pos+j]) continue loop1 From 1c577fd020d35ce6d3d9ea711a3bbe4d4b3eee73 Mon Sep 17 00:00:00 2001 From: Ajaz Ur Rehman Date: Thu, 23 Apr 2020 21:06:19 +0530 Subject: [PATCH 5/5] add multiple column delimiter support --- lib/index.js | 42 ++++++++++++++++---------- test/option.delimiter.coffee | 57 ++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 19 deletions(-) diff --git a/lib/index.js b/lib/index.js index bcc7b93..fe6d158 100644 --- a/lib/index.js +++ b/lib/index.js @@ -95,21 +95,27 @@ class Parser extends Transform { } } // Normalize option `delimiter` - if(options.delimiter === undefined || options.delimiter === null || options.delimiter === false){ - options.delimiter = Buffer.from(',') - }else if(typeof options.delimiter === 'string' && options.delimiter.length !== 0){ - options.delimiter = Buffer.from(options.delimiter) - }else if( - (Buffer.isBuffer(options.delimiter) && options.delimiter.length === 0) || - (typeof options.delimiter === 'string' && options.delimiter.length === 0) || - (!Buffer.isBuffer(options.delimiter) && typeof options.delimiter !== 'string') - ){ + const delimiter_json = JSON.stringify(options.delimiter) + if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter] + if(options.delimiter.length === 0){ throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ 'Invalid option delimiter:', - 'delimiter must be a non empty string or buffer,', - `got ${JSON.stringify(options.delimiter)}` + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` ]) } + options.delimiter = options.delimiter.map(function(del){ + if(del === undefined || del === null || del === false){ + return Buffer.from(',') + }else if((typeof del === 'string' || Buffer.isBuffer(del)) && del.length !== 0){ + return Buffer.from(del) + } + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` + ]) + }) // Normalize option `escape` if(options.escape === undefined || options.escape === null){ options.escape = Buffer.from('"') @@ -923,12 +929,16 @@ class Parser extends Transform { } __isDelimiter(chr, buf, pos){ const {delimiter} = this.options - const delLength = delimiter.length - if(delimiter[0] !== chr) return 0 - for(let i = 1; i < delLength; i++){ - if(delimiter[i] !== buf[pos+i]) return 0 + loop1: for(let i = 0; i < delimiter.length; i++){ + const del = delimiter[i] + if(del[0] === chr){ + for(let j = 1; j < del.length; j++){ + if(del[j] !== buf[pos+j]) continue loop1 + } + return del.length + } } - return delimiter.length + return 0 } __isRecordDelimiter(chr, buf, pos){ const {record_delimiter} = this.options diff --git a/test/option.delimiter.coffee b/test/option.delimiter.coffee index b3ee6e0..fe16cc8 100644 --- a/test/option.delimiter.coffee +++ b/test/option.delimiter.coffee @@ -9,17 +9,32 @@ describe 'Option `delimiter`', -> (-> parse '', delimiter: '', (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got ""' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got ""' code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: Buffer.from(''), (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got {"type":"Buffer","data":[]}' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got {"type":"Buffer","data":[]}' code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: true, (->) ).should.throw - message: 'Invalid option delimiter: delimiter must be a non empty string or buffer, got true' + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got true' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got []' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [""]' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [',',''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [",",""]' code: 'CSV_INVALID_OPTION_DELIMITER' it 'using default comma', (next) -> @@ -69,3 +84,39 @@ describe 'Option `delimiter`', -> [ '','1974','8.8392926E7','',''] ] next() + + it 'using array of a single delimiter', (next) -> + parse """ + abc,,123, + ,def,, + """, delimiter: [','], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next() + + it 'using array of a single delimiter of multiple characters', (next) -> + parse """ + !# + !# + """, delimiter: ['!#'], (err, data) -> + return next err if err + data.should.eql [ + [ '', ''] + [ '', ''] + ] + next() + + it 'using array of a multiple delimiters of variable length', (next) -> + parse """ + abc,;;123;; + ;;def;;, + """, delimiter: [',', ';;'], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next()