Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save brandonhimpfen/362d78001b792350da6789fb10df217a to your computer and use it in GitHub Desktop.

Select an option

Save brandonhimpfen/362d78001b792350da6789fb10df217a to your computer and use it in GitHub Desktop.
Parse a delimited (CSV-like) file line-by-line in Node.js with streaming output (no deps, supports quotes, header mapping, and large files).
#!/usr/bin/env node
/**
* Parse a delimited file (CSV-like) line-by-line with streaming output (no deps).
*
* Features:
* - Streams the input file (memory efficient)
* - Supports a delimiter (default: comma)
* - Handles quoted fields (basic CSV rules: "escaped "" quotes")
* - Optional header row -> outputs objects
* - Outputs JSON Lines (NDJSON) to stdout (great for pipelines)
*
* Usage:
* node node-parse-delimited-streaming.js input.csv > out.jsonl
*
* Options:
* --delimiter=, Set delimiter (default: ,)
* --no-header Treat first row as data (default assumes header)
* --trim Trim unquoted fields
*
* Examples:
* node node-parse-delimited-streaming.js data.csv --delimiter=, > out.jsonl
* node node-parse-delimited-streaming.js data.tsv --delimiter=$'\t' > out.jsonl
*/
const fs = require("fs");
const readline = require("readline");
const path = require("path");
function parseArgs(argv) {
const args = {
file: null,
delimiter: ",",
header: true,
trim: false,
};
for (const a of argv.slice(2)) {
if (!args.file && !a.startsWith("--")) {
args.file = a;
continue;
}
if (a.startsWith("--delimiter=")) {
args.delimiter = a.split("=", 2)[1];
continue;
}
if (a === "--no-header") {
args.header = false;
continue;
}
if (a === "--trim") {
args.trim = true;
continue;
}
if (a === "-h" || a === "--help") {
return { ...args, help: true };
}
throw new Error(`Unknown argument: ${a}`);
}
return args;
}
// Basic CSV-like parser for a single line with delimiter + quotes.
// Handles:
// - delimiter separation
// - quoted fields: "a,b"
// - escaped quotes inside quoted fields: "he said ""hi"""
function parseDelimitedLine(line, delimiter, trim) {
const out = [];
let cur = "";
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (inQuotes) {
if (ch === '"') {
// Escaped quote: "" inside a quoted field
if (line[i + 1] === '"') {
cur += '"';
i++;
} else {
inQuotes = false;
}
} else {
cur += ch;
}
continue;
}
if (ch === '"') {
inQuotes = true;
continue;
}
if (ch === delimiter) {
out.push(trim ? cur.trim() : cur);
cur = "";
continue;
}
cur += ch;
}
out.push(trim ? cur.trim() : cur);
return out;
}
async function streamLines(filePath, onLine) {
const absolute = path.resolve(filePath);
const stream = fs.createReadStream(absolute, { encoding: "utf8" });
const rl = readline.createInterface({
input: stream,
crlfDelay: Infinity,
});
let lineNum = 0;
try {
for await (const line of rl) {
lineNum++;
await onLine(line, lineNum);
}
} finally {
rl.close();
}
}
function usage() {
console.error(`Usage: node node-parse-delimited-streaming.js <file> [options]
Options:
--delimiter=, Delimiter character (default: ,)
--no-header Do not treat first row as header (default: header)
--trim Trim unquoted fields
`);
}
async function main() {
const args = parseArgs(process.argv);
if (args.help || !args.file) {
usage();
process.exit(args.file ? 0 : 2);
}
let headers = null;
await streamLines(args.file, async (line, lineNum) => {
// Skip empty lines (optional behavior; comment out if unwanted)
if (line === "") return;
const fields = parseDelimitedLine(line, args.delimiter, args.trim);
if (lineNum === 1 && args.header) {
headers = fields;
return;
}
let record;
if (headers) {
record = {};
for (let i = 0; i < headers.length; i++) {
record[headers[i]] = fields[i] ?? "";
}
} else {
record = fields;
}
// Streaming output: JSON Lines (one JSON object per line)
process.stdout.write(JSON.stringify(record) + "\n");
});
}
main().catch((err) => {
console.error("ERROR:", err && err.stack ? err.stack : err);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment