Created
March 1, 2026 19:48
-
-
Save brandonhimpfen/362d78001b792350da6789fb10df217a to your computer and use it in GitHub Desktop.
Parse a delimited (CSV-like) file line-by-line in Node.js with streaming output (no deps, supports quotes, header mapping, and large files).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /** | |
| * Parse a delimited file (CSV-like) line-by-line with streaming output (no deps). | |
| * | |
| * Features: | |
| * - Streams the input file (memory efficient) | |
| * - Supports a delimiter (default: comma) | |
| * - Handles quoted fields (basic CSV rules: "escaped "" quotes") | |
| * - Optional header row -> outputs objects | |
| * - Outputs JSON Lines (NDJSON) to stdout (great for pipelines) | |
| * | |
| * Usage: | |
| * node node-parse-delimited-streaming.js input.csv > out.jsonl | |
| * | |
| * Options: | |
| * --delimiter=, Set delimiter (default: ,) | |
| * --no-header Treat first row as data (default assumes header) | |
| * --trim Trim unquoted fields | |
| * | |
| * Examples: | |
| * node node-parse-delimited-streaming.js data.csv --delimiter=, > out.jsonl | |
| * node node-parse-delimited-streaming.js data.tsv --delimiter=$'\t' > out.jsonl | |
| */ | |
| const fs = require("fs"); | |
| const readline = require("readline"); | |
| const path = require("path"); | |
| function parseArgs(argv) { | |
| const args = { | |
| file: null, | |
| delimiter: ",", | |
| header: true, | |
| trim: false, | |
| }; | |
| for (const a of argv.slice(2)) { | |
| if (!args.file && !a.startsWith("--")) { | |
| args.file = a; | |
| continue; | |
| } | |
| if (a.startsWith("--delimiter=")) { | |
| args.delimiter = a.split("=", 2)[1]; | |
| continue; | |
| } | |
| if (a === "--no-header") { | |
| args.header = false; | |
| continue; | |
| } | |
| if (a === "--trim") { | |
| args.trim = true; | |
| continue; | |
| } | |
| if (a === "-h" || a === "--help") { | |
| return { ...args, help: true }; | |
| } | |
| throw new Error(`Unknown argument: ${a}`); | |
| } | |
| return args; | |
| } | |
| // Basic CSV-like parser for a single line with delimiter + quotes. | |
| // Handles: | |
| // - delimiter separation | |
| // - quoted fields: "a,b" | |
| // - escaped quotes inside quoted fields: "he said ""hi""" | |
| function parseDelimitedLine(line, delimiter, trim) { | |
| const out = []; | |
| let cur = ""; | |
| let inQuotes = false; | |
| for (let i = 0; i < line.length; i++) { | |
| const ch = line[i]; | |
| if (inQuotes) { | |
| if (ch === '"') { | |
| // Escaped quote: "" inside a quoted field | |
| if (line[i + 1] === '"') { | |
| cur += '"'; | |
| i++; | |
| } else { | |
| inQuotes = false; | |
| } | |
| } else { | |
| cur += ch; | |
| } | |
| continue; | |
| } | |
| if (ch === '"') { | |
| inQuotes = true; | |
| continue; | |
| } | |
| if (ch === delimiter) { | |
| out.push(trim ? cur.trim() : cur); | |
| cur = ""; | |
| continue; | |
| } | |
| cur += ch; | |
| } | |
| out.push(trim ? cur.trim() : cur); | |
| return out; | |
| } | |
| async function streamLines(filePath, onLine) { | |
| const absolute = path.resolve(filePath); | |
| const stream = fs.createReadStream(absolute, { encoding: "utf8" }); | |
| const rl = readline.createInterface({ | |
| input: stream, | |
| crlfDelay: Infinity, | |
| }); | |
| let lineNum = 0; | |
| try { | |
| for await (const line of rl) { | |
| lineNum++; | |
| await onLine(line, lineNum); | |
| } | |
| } finally { | |
| rl.close(); | |
| } | |
| } | |
| function usage() { | |
| console.error(`Usage: node node-parse-delimited-streaming.js <file> [options] | |
| Options: | |
| --delimiter=, Delimiter character (default: ,) | |
| --no-header Do not treat first row as header (default: header) | |
| --trim Trim unquoted fields | |
| `); | |
| } | |
| async function main() { | |
| const args = parseArgs(process.argv); | |
| if (args.help || !args.file) { | |
| usage(); | |
| process.exit(args.file ? 0 : 2); | |
| } | |
| let headers = null; | |
| await streamLines(args.file, async (line, lineNum) => { | |
| // Skip empty lines (optional behavior; comment out if unwanted) | |
| if (line === "") return; | |
| const fields = parseDelimitedLine(line, args.delimiter, args.trim); | |
| if (lineNum === 1 && args.header) { | |
| headers = fields; | |
| return; | |
| } | |
| let record; | |
| if (headers) { | |
| record = {}; | |
| for (let i = 0; i < headers.length; i++) { | |
| record[headers[i]] = fields[i] ?? ""; | |
| } | |
| } else { | |
| record = fields; | |
| } | |
| // Streaming output: JSON Lines (one JSON object per line) | |
| process.stdout.write(JSON.stringify(record) + "\n"); | |
| }); | |
| } | |
| main().catch((err) => { | |
| console.error("ERROR:", err && err.stack ? err.stack : err); | |
| process.exit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment