Skip to content

Commit

Permalink
Beta, with better structure
Browse files Browse the repository at this point in the history
  • Loading branch information
David Mesquita-Morris committed Nov 24, 2021
1 parent c59c643 commit 92f7efd
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 26 deletions.
33 changes: 21 additions & 12 deletions lib/node/index.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.parse = void 0;
// unicode character constants
const lineFeed = '\u000A';
const carriageReturn = '\u000D';
const doubleQuote = '\u0022';
const comma = '\u002C';
const lineSeperator = '\u2028';
const paragraphSeperator = '\u2029';
const byteOrderMark = '\uFEFF';
// regular expression fragments
const lineTerminator = `[${lineFeed}${carriageReturn}${lineSeperator}${paragraphSeperator}]+`; // any of the line terminator code points as defined in the ECMAScript® 2019 Language Specification
const unquoted = `(?=(?:(?:[^${doubleQuote}]*${doubleQuote}){2})*[^${doubleQuote}]*$)`; // a look ahead to ensure whatever is matched in not within double quotes
// regular expressions used to parse
const trim = new RegExp(`^${byteOrderMark}|${lineTerminator}$`);
const rows = new RegExp(`${lineTerminator}${unquoted}`);
const tokens = new RegExp(`${comma}${unquoted}`);
const quotes = new RegExp(`(^${doubleQuote}|${doubleQuote}$)`, 'g');
const doubleDoubleQuotes = new RegExp(`${doubleQuote}${doubleQuote}`, 'g');
/**
* Parses a string encoded as a comma seperated values
* @param text The source csv text.
* @returns An array of objects.
*/
function parse(text) {
// trim byte order mark from beginning and any trailing EOL if present
const tokens = text.replace(/^\uFEFF|[\r\n]+$/, '')
// split text into rows at EOL
.split(/[\r\n]+(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row =>
// split row into tokens based on comma delimiter (unless in quotes); see answer here: https://stackoverflow.com/questions/23582276/split-string-by-comma-but-ignore-commas-inside-quotes/23582323#23582323
row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)
// dequote tokens if needed
.map(token => token.replace(/(^"|"$)/g, '')
// replace double double quotes with double quotes
.replace(/\"\"/g, '"')));
// create a table of tokens from the source csv formatted text
const table = text.replace(trim, '').split(rows).map(row => row.split(tokens).map(token => token.replace(quotes, '').replace(doubleDoubleQuotes, doubleQuote)));
// extract the header row and use for the property names
const header = tokens.shift();
const header = table.shift();
// convert subsiquent rows into objects
return header ? tokens.map(row => Object.fromEntries(header.map((column, index) => [column, row[index]]))) : [];
return header ? table.map(row => Object.fromEntries(header.map((column, index) => [column, row[index]]))) : [];
}
exports.parse = parse;
2 changes: 1 addition & 1 deletion lib/web/csv.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@steelbreeze/csv",
"version": "1.0.0-alpha.3",
"version": "1.0.0-beta",
"description": "Tools for reading and writnig files formatted as CSV",
"main": "lib/node/index.js",
"module": "lib/node/index.js",
Expand Down
36 changes: 24 additions & 12 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,35 @@
// unicode character constants
const lineFeed = '\u000A';
const carriageReturn = '\u000D';
const doubleQuote = '\u0022';
const comma = '\u002C';
const lineSeperator = '\u2028';
const paragraphSeperator = '\u2029';
const byteOrderMark = '\uFEFF';

// regular expression fragments
const lineTerminator = `[${lineFeed}${carriageReturn}${lineSeperator}${paragraphSeperator}]+`; // any of the line terminator code points as defined in the ECMAScript® 2019 Language Specification
const unquoted = `(?=(?:(?:[^${doubleQuote}]*${doubleQuote}){2})*[^${doubleQuote}]*$)`; // a look ahead to ensure whatever is matched in not within double quotes

// regular expressions used to parse
const trim = new RegExp(`^${byteOrderMark}|${lineTerminator}$`);
const rows = new RegExp(`${lineTerminator}${unquoted}`);
const tokens = new RegExp(`${comma}${unquoted}`);
const quotes = new RegExp(`(^${doubleQuote}|${doubleQuote}$)`, 'g');
const doubleDoubleQuotes = new RegExp(`${doubleQuote}${doubleQuote}`, 'g');

/**
* Parses a string encoded as a comma seperated values
* @param text The source csv text.
* @returns An array of objects.
*/
export function parse(text: string): Array<any> {
// trim byte order mark from beginning and any trailing EOL if present
const tokens = text.replace(/^\uFEFF|[\r\n]+$/, '')
// split text into rows at EOL (unless in quotes)
.split(/\r?\n(?=(?:(?:[^"]*"){2})*[^"]*$)|\r(?=(?:(?:[^"]*"){2})*[^"]*$)/).map(row =>
// split row into tokens based on comma delimiter (unless in quotes)
row.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/)
// dequote tokens if needed
.map(token => token.replace(/(^"|"$)/g, '')
// replace double double quotes with double quotes
.replace(/\"\"/g, '"')));
// create a table of tokens from the source csv formatted text
const table = text.replace(trim, '').split(rows).map(row => row.split(tokens).map(token => token.replace(quotes, '').replace(doubleDoubleQuotes, doubleQuote)));

// extract the header row and use for the property names
const header = tokens.shift();
const header = table.shift();

// convert subsiquent rows into objects
return header ? tokens.map(row => Object.fromEntries(header.map((column, index) => [column, row[index]]))) : [];
return header ? table.map(row => Object.fromEntries(header.map((column, index) => [column, row[index]]))) : [];
}

0 comments on commit 92f7efd

Please sign in to comment.