Skip to content

Instantly share code, notes, and snippets.

@James-E-A
Last active June 24, 2025 16:32
Show Gist options
  • Save James-E-A/b3457c568c9c61d671bb1df46997ef9d to your computer and use it in GitHub Desktop.
Save James-E-A/b3457c568c9c61d671bb1df46997ef9d to your computer and use it in GitHub Desktop.
Javascript parse CSV
const END_OF_FIELD = Symbol('\u001f');
const END_OF_RECORD = Symbol('\u001e');
export async function* csv_iterate(text_stream) {
// public iterator.
if (typeof text_stream === 'string')
text_stream = [text_stream];
else if (text_stream instanceof Blob)
text_stream = text_stream.stream().pipeThrough(new TextDecoderStream(
text_stream.type?.match(/;\s*charset=("?)(?<value>\S+)(?=\1)/).groups.value ?? 'utf-8',
{ fatal: true, ignoreBOM: true }
));
else if (text_stream instanceof Response)
text_stream = text_stream.body.pipeThrough(new TextDecoderStream((
text_stream.headers.get('content-type')?.match(/;\s*charset=("?)(?<value>\S+)(?=\1)/).groups.value ?? 'utf-8',
{ fatal: true, ignoreBOM: true }
));
const rows = csv_iterate_1(text_stream);
const keys = (await rows.next()).value;
for await (const row of rows) {
if (row.length === keys.length)
yield Object.fromEntries(keys.map((k, i) => [k, row[i]]));
else
yield Object.fromEntries(row.map((v, i) => [keys[i] ?? `_${i+1}`,v]));
}
}
async function* csv_iterate_1(text_stream) {
// core iterator.
let field = "";
let row = [];
for await (const token of csv_iterate_2(text_stream)) {
switch (token) {
case END_OF_FIELD:
row.push(field);
field = "";
break;
case END_OF_RECORD:
row.push(field);
field = "";
yield row;
row = [];
break;
default:
field += token;
}
}
// END_OF_FILE
row.push(field);
yield row;
}
async function* csv_iterate_2(text_stream) {
// meta tokenizer.
let state = 0;
for await (const s of csv_iterate_3(text_stream)) {
switch (state) {
case 1:
// Inside quotes
if (s !== '"') {
yield s;
} else {
state = 2;
}
break;
case 2:
// Maybe exiting quotes
if (s !== '"') {
state = 0;
'goto case 0';
} else {
// Nope, just an escaped quote
yield s;
state = 1;
break;
}
case 0:
// Not in quotes
switch (s) {
case ',':
yield END_OF_FIELD;
break;
case '\n':
case '\r\n':
yield END_OF_RECORD;
break;
case '"':
// Entering quotes
state = 1;
break;
default:
yield s;
}
break;
}
}
}
async function* csv_iterate_3(text_stream) {
// raw tokenizer.
let maybeSplitCrlf = false;
for await (const chunk of text_stream) {
for (const [s] of chunk.matchAll(/(?:,|\r?\n|"|.+?(?=,|\r?\n|"|$))/gs)) {
if (maybeSplitCrlf) {
// codepath to handle data that would have matched as CRLF were it not for a chunk boundary in the underlying stream
if (s === '\n') {
yield '\r\n';
maybeSplitCrlf = false;
continue;
}
maybeSplitCrlf = false;
}
if (s === '\r') {
maybeSplitCrlf = true;
continue;
}
yield s;
}
}
if (maybeSplitCrlf)
// was not actually a split CRLF, we still owe caller this token
yield '\r';
}
@James-E-A
Copy link
Author

James-E-A commented May 29, 2025

temp0 = await (
	fetch('https://ccadb.my.salesforce-sites.com/mozilla/IncludedCACertificateReportCSVFormat')
	.then(r => Array.fromAsync(csv_iterate(r)))
);

db = await new Promise((resolve, reject) => {
	const req = indexedDB.open("test", 2460825);
	req.addEventListener('error', (event) => void reject(event.error || event.target.error));
	req.addEventListener('success', (event) => void resolve(event.target.result));
	req.addEventListener('upgradeneeded', (event) => {
		const db = event.target.result;
		db.createObjectStore("IncludedCACertificateReport");
	});
});

try {
	await new Promise((resolve, reject) => {
		const txn = db.transaction("IncludedCACertificateReport", 'readwrite');
		txn.addEventListener('abort', (event) => void reject(new Error(`${Object.getPrototypeOf(event.target).constructor.name}: ${event.type} event`, { cause: event })));
		txn.addEventListener('complete', (event) => void resolve(event.target.transaction));
		txn.addEventListener('error', (event) => void reject(event.error || event.target.error));

		const os = txn.objectStore("IncludedCACertificateReport");
		
		temp0.forEach((record) => {
			os.put(record, record["SHA-256 Fingerprint"].replaceAll(/(\w{2})(?=\w)/g, "$1:").toLowerCase());
		});
	});
} finally {
	await db.close();
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment