Skip to content

Instantly share code, notes, and snippets.

@greenido
Created July 9, 2025 16:36
Show Gist options
  • Select an option

  • Save greenido/1c0ab19c4dfcf41596258955d5c28cea to your computer and use it in GitHub Desktop.

Select an option

Save greenido/1c0ab19c4dfcf41596258955d5c28cea to your computer and use it in GitHub Desktop.
CSV JSON Extractor tool that will help you extract JSON data from CSV columns
import React, { useState } from 'react';
import { Upload, Download, FileText, AlertCircle } from 'lucide-react';
import Papa from 'papaparse';
const JSONExtractor = () => {
const [csvData, setCsvData] = useState([]);
const [headers, setHeaders] = useState([]);
const [selectedColumns, setSelectedColumns] = useState([]);
const [extractedJSON, setExtractedJSON] = useState([]);
const [error, setError] = useState('');
const [processing, setProcessing] = useState(false);
const handleFileUpload = (event) => {
const file = event.target.files[0];
if (!file) return;
setProcessing(true);
setError('');
Papa.parse(file, {
complete: (results) => {
if (results.errors.length > 0) {
setError('Error parsing CSV: ' + results.errors[0].message);
setProcessing(false);
return;
}
const headers = results.data[0] || [];
const data = results.data.slice(1);
setHeaders(headers);
setCsvData(data);
setSelectedColumns([]);
setExtractedJSON([]);
setProcessing(false);
},
header: false,
skipEmptyLines: true,
dynamicTyping: false
});
};
const isValidJSON = (str) => {
try {
JSON.parse(str);
return true;
} catch (e) {
return false;
}
};
const detectJSONColumns = () => {
if (csvData.length === 0) return [];
const jsonColumns = [];
headers.forEach((header, index) => {
let jsonCount = 0;
const sampleSize = Math.min(10, csvData.length);
for (let i = 0; i < sampleSize; i++) {
const value = csvData[i][index];
if (value && typeof value === 'string' && isValidJSON(value)) {
jsonCount++;
}
}
if (jsonCount > 0) {
jsonColumns.push({
name: header,
index: index,
jsonCount: jsonCount,
percentage: (jsonCount / sampleSize) * 100
});
}
});
return jsonColumns;
};
const extractJSON = () => {
if (selectedColumns.length === 0) {
setError('Please select at least one column to extract JSON from');
return;
}
setProcessing(true);
const extracted = [];
csvData.forEach((row, rowIndex) => {
const rowData = { rowIndex: rowIndex + 1 };
selectedColumns.forEach(columnIndex => {
const columnName = headers[columnIndex];
const value = row[columnIndex];
if (value && typeof value === 'string' && isValidJSON(value)) {
try {
rowData[columnName] = JSON.parse(value);
} catch (e) {
rowData[columnName + '_error'] = `Invalid JSON: ${e.message}`;
rowData[columnName + '_raw'] = value;
}
} else if (value) {
rowData[columnName + '_raw'] = value;
rowData[columnName + '_note'] = 'Not valid JSON';
}
});
if (Object.keys(rowData).length > 1) {
extracted.push(rowData);
}
});
setExtractedJSON(extracted);
setProcessing(false);
};
const downloadJSON = () => {
if (extractedJSON.length === 0) return;
const dataStr = JSON.stringify(extractedJSON, null, 2);
const dataBlob = new Blob([dataStr], { type: 'application/json' });
const url = URL.createObjectURL(dataBlob);
const link = document.createElement('a');
link.href = url;
link.download = 'extracted_json.json';
link.click();
URL.revokeObjectURL(url);
};
const jsonColumns = detectJSONColumns();
return (
<div className="max-w-4xl mx-auto p-6 bg-white">
<h1 className="text-2xl font-bold mb-6 text-gray-800">CSV JSON Extractor</h1>
{/* File Upload */}
<div className="mb-6">
<label className="block text-sm font-medium text-gray-700 mb-2">
Upload CSV File
</label>
<div className="border-2 border-dashed border-gray-300 rounded-lg p-6 text-center">
<Upload className="mx-auto h-12 w-12 text-gray-400 mb-2" />
<input
type="file"
accept=".csv"
onChange={handleFileUpload}
className="hidden"
id="csv-upload"
/>
<label
htmlFor="csv-upload"
className="cursor-pointer bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
>
Choose CSV File
</label>
</div>
</div>
{/* Error Display */}
{error && (
<div className="mb-4 p-3 bg-red-100 border border-red-400 text-red-700 rounded flex items-center">
<AlertCircle className="h-5 w-5 mr-2" />
{error}
</div>
)}
{/* CSV Preview */}
{csvData.length > 0 && (
<div className="mb-6">
<h2 className="text-lg font-semibold mb-3">CSV Preview</h2>
<div className="bg-gray-50 p-3 rounded text-sm">
<p><strong>Rows:</strong> {csvData.length}</p>
<p><strong>Columns:</strong> {headers.length}</p>
<p><strong>Headers:</strong> {headers.join(', ')}</p>
</div>
</div>
)}
{/* JSON Column Detection */}
{jsonColumns.length > 0 && (
<div className="mb-6">
<h2 className="text-lg font-semibold mb-3">Detected JSON Columns</h2>
<div className="space-y-2">
{jsonColumns.map((col, index) => (
<div key={index} className="flex items-center justify-between p-3 bg-green-50 rounded">
<div>
<span className="font-medium">{col.name}</span>
<span className="text-sm text-gray-600 ml-2">
({col.jsonCount} JSON entries, {col.percentage.toFixed(1)}% of sample)
</span>
</div>
<label className="flex items-center">
<input
type="checkbox"
checked={selectedColumns.includes(col.index)}
onChange={(e) => {
if (e.target.checked) {
setSelectedColumns([...selectedColumns, col.index]);
} else {
setSelectedColumns(selectedColumns.filter(i => i !== col.index));
}
}}
className="mr-2"
/>
Extract
</label>
</div>
))}
</div>
</div>
)}
{/* Manual Column Selection */}
{csvData.length > 0 && (
<div className="mb-6">
<h2 className="text-lg font-semibold mb-3">Manual Column Selection</h2>
<div className="grid grid-cols-2 md:grid-cols-3 gap-2">
{headers.map((header, index) => (
<label key={index} className="flex items-center p-2 border rounded">
<input
type="checkbox"
checked={selectedColumns.includes(index)}
onChange={(e) => {
if (e.target.checked) {
setSelectedColumns([...selectedColumns, index]);
} else {
setSelectedColumns(selectedColumns.filter(i => i !== index));
}
}}
className="mr-2"
/>
<span className="text-sm">{header}</span>
</label>
))}
</div>
</div>
)}
{/* Extract Button */}
{selectedColumns.length > 0 && (
<div className="mb-6">
<button
onClick={extractJSON}
disabled={processing}
className="bg-green-500 text-white px-6 py-2 rounded hover:bg-green-600 disabled:opacity-50 flex items-center"
>
<FileText className="h-4 w-4 mr-2" />
{processing ? 'Processing...' : 'Extract JSON'}
</button>
</div>
)}
{/* Results */}
{extractedJSON.length > 0 && (
<div className="mb-6">
<h2 className="text-lg font-semibold mb-3">Extracted JSON Data</h2>
<div className="bg-gray-50 p-3 rounded mb-4">
<p><strong>Extracted Records:</strong> {extractedJSON.length}</p>
</div>
<div className="max-h-96 overflow-y-auto bg-gray-100 p-4 rounded font-mono text-sm">
<pre>{JSON.stringify(extractedJSON.slice(0, 3), null, 2)}</pre>
{extractedJSON.length > 3 && (
<p className="text-gray-600 mt-2">... and {extractedJSON.length - 3} more records</p>
)}
</div>
<button
onClick={downloadJSON}
className="mt-4 bg-blue-500 text-white px-6 py-2 rounded hover:bg-blue-600 flex items-center"
>
<Download className="h-4 w-4 mr-2" />
Download JSON File
</button>
</div>
)}
</div>
);
};
export default JSONExtractor;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment