Skip to content

Instantly share code, notes, and snippets.

@Malien
Last active July 24, 2024 10:13
Show Gist options
  • Select an option

  • Save Malien/ac8206e46cab1b164a7ccbd5070f597a to your computer and use it in GitHub Desktop.

Select an option

Save Malien/ac8206e46cab1b164a7ccbd5070f597a to your computer and use it in GitHub Desktop.
IDB Bulk Extraction Benchmark

Was trying to see which method for bulk retrieving data from IndexedDB would perform the best on different data distribution.

Key takeaways:

  • Use Promise.all(keys.map(key => store.get(key))) for most cases (random lookups or even contigiuos-ish cases are mostly fine).
  • Use store.get(KeyRange.bound(lower, upper) when you know you are interested in a contigious range.
  • Avoid cursor API whenever possible. If you are fine with a bit more memory usage, just fetch everything via getAll() and do filtering afterwards.
  • The chunked retrieval is honestly pretty descent, for contigious/contigiuos-ish lookups.
  • When the loads start to look like full-table retrieval (ie. retrieving 80% of the records), Promise.all starts to fall behind. Consider doing full-table retrieval, or, if you are memory constrained, chunked iteration/full-table scan via getAll or cursor API.

Duh.

<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>IDB retrieval benchmark</title>
<style>
table {
width: 100%;
border-collapse: collapse;
}
th,
td {
border: 1px solid black;
padding: 0.5rem;
}
th {
background-color: #f0f0f0;
}
tr:nth-child(even) {
background-color: #f0f0f0;
}
</style>
</head>
<body>
<table>
<thead>
<tr>
<th>Name</th>
<th>Time (ms)</th>
</tr>
</thead>
<tbody id="results"></tbody>
</table>
<script type="module">
import {
openDB,
deleteDB,
} from "https://cdn.jsdelivr.net/npm/idb@8/+esm"
const decentlySizedRecord = {
name: "John Doe",
socials: {
facebook: "https://facebook.com/johndoe",
twitter: "https://twitter.com/johndoe",
instagram: "https://instagram.com/johndoe",
},
payout: null,
phoneNumber: "1234567890",
email: "mymail@mail.com",
city: { id: 4, name: "New York" },
cover: {
type: "original",
src: "https://example.com/johndoe.jpg",
},
videoURL: "https://example.com/johndoe.mp4",
textDescription: "This is a description of John Doe",
galleryImages: [
{
type: "original",
src: "https://example.com/johndoe1.jpg",
},
{
type: "original",
src: "https://example.com/johndoe2.jpg",
},
{
type: "original",
src: "https://example.com/johndoe3.jpg",
},
],
}
const TEST_SIZE = 100_000
const SAMPLES = 10
await deleteDB("test-db")
const db = await recordTiming("open database", async () => {
return await openDB("test-db", 1, {
upgrade(db) {
db.createObjectStore("store", { autoIncrement: true })
},
})
})
recordTiming("add records", async () => {
const tx = db.transaction("store", "readwrite")
const store = tx.store
for (let i = 0; i < TEST_SIZE; i++) {
store.add(decentlySizedRecord)
}
await tx.done
})
await db.get("store", 0) // flush writes
await recordTiming("fetch one in random (one by one)", async () => {
await sampling(async () => {
await fetchOneByOne([(Math.random() * TEST_SIZE) | 0])
})
})
await recordTiming(
"fetch one in random (scanning bounded)",
async () => {
await sampling(async () => {
await fetchByScanningBounded([
(Math.random() * TEST_SIZE) | 0,
])
})
},
)
await recordTiming("fetch one in random (chunked)", async () => {
await sampling(async () => {
await fetchByChunkingBounds([
(Math.random() * TEST_SIZE) | 0,
])
})
})
await recordTiming(
"fetch one in random (chunked padded)",
async () => {
await sampling(async () => {
await fetchByChunkingBoundsPaddedCursorAPI([
(Math.random() * TEST_SIZE) | 0,
])
})
},
)
for (const sampleSize of [10, 10_000, 80_000]) {
const random = randomSample(sampleSize)
const contigious = contigiousSample(sampleSize)
const contigiousWithHoles = contigiousSampleWithHoles(
sampleSize,
Math.sqrt(sampleSize) | 0,
)
await bench(sampleSize, random, "random")
await bench(sampleSize, contigious, "contigious")
await bench(
sampleSize,
contigiousWithHoles,
"contigious with holes",
)
}
async function bench(sampleSize, sample, distribution) {
await recordTiming(
`fetch ${sampleSize} in ${distribution} (one by one)`,
async () => {
await sampling(async () => {
await fetchOneByOne(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (scanning)`,
async () => {
await sampling(async () => {
await fetchByScanning(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (scanning bounded)`,
async () => {
await sampling(async () => {
await fetchByScanningBounded(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (filtered in-memory bounded)`,
async () => {
await sampling(async () => {
await fetchByFetchingBounded(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (chunked)`,
async () => {
await sampling(async () => {
await fetchByChunkingBounds(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (chunked padded cursor API)`,
async () => {
await sampling(async () => {
await fetchByChunkingBoundsPaddedCursorAPI(sample)
})
},
)
await recordTiming(
`fetch ${sampleSize} in ${distribution} (chunked padded post filter)`,
async () => {
await sampling(async () => {
await fetchByChunkingBoundsPaddedPostFilter(sample)
})
},
)
}
async function recordTiming(caseName, fn) {
console.group(caseName)
try {
const start = performance.now()
const result = await fn()
const end = performance.now()
const time = end - start
const row = document.createElement("tr")
row.innerHTML = `<td>${caseName}</td><td>${time}</td>`
document.getElementById("results").appendChild(row)
return result
} finally {
console.groupEnd()
}
}
async function sampling(fn) {
for (let i = 0; i < SAMPLES; i++) {
console.time(`Sample ${i + 1}`)
await fn()
console.timeEnd(`Sample ${i + 1}`)
}
}
function randomSample(size) {
const sample = new Set()
while (sample.size < size) {
sample.add((Math.random() * TEST_SIZE) | 0)
}
return Array.from(sample)
}
function contigiousSample(size) {
const sample = []
let start = (Math.random() * TEST_SIZE) | 0
for (let i = 0; i < size; i++) {
sample.push((start + i) % TEST_SIZE)
}
return sample
}
function contigiousSampleWithHoles(size, holeCount) {
const sample = contigiousSample(size)
for (let i = 0; i < holeCount; i++) {
sample.splice((Math.random() * sample.length) | 0, 1)
}
return sample
}
async function fetchOneByOne(ids) {
const { store } = db.transaction("store")
return await Promise.all(ids.map(id => store.get(id)))
}
async function fetchByScanning(ids) {
const { store } = db.transaction("store")
const results = []
let i = 0
ids.sort((a, b) => a - b)
for await (const cursor of store.iterate()) {
if (cursor.key === ids[i]) {
results.push(cursor.value)
i++
if (i === ids.length) break
}
}
return results
}
async function fetchByScanningBounded(ids) {
const { store } = db.transaction("store")
const results = []
let i = 0
ids.sort((a, b) => a - b)
for await (const cursor of store.iterate(
IDBKeyRange.bound(ids[0], ids[ids.length - 1]),
)) {
if (cursor.key === ids[i]) {
results.push(cursor.value)
i++
if (i === ids.length) break
}
}
return results
}
async function fetchByFetchingBounded(ids) {
const { store } = db.transaction("store")
const results = []
let i = 0
ids.sort((a, b) => a - b)
const values = await store.getAll(
IDBKeyRange.bound(ids[0], ids[ids.length - 1]),
)
for (const value of values) {
if (value.key === ids[i]) {
results.push(value)
i++
if (i === ids.length) break
}
}
return results
}
async function fetchByChunkingBounds(ids) {
const tx = db.transaction("store")
const store = tx.store
const results = []
let i = 0
ids.sort((a, b) => a - b)
for (const chunk of splitIntoContigiousChunks(ids)) {
store
.getAll(IDBKeyRange.bound(chunk.start, chunk.end))
.then(values => {
results.push(...values)
})
}
await tx.done
return results
}
async function fetchByChunkingBoundsPaddedCursorAPI(ids) {
const tx = db.transaction("store")
const store = tx.store
const results = []
let i = 0
ids.sort((a, b) => a - b)
for (const chunk of splitIntoContigiousChunks(ids, 10)) {
;(async () => {
for await (const value of store.iterate(
IDBKeyRange.bound(chunk.start, chunk.end),
)) {
if (value.key === ids[i]) {
i++
results.push(value)
}
}
})()
}
await tx.done
return results
}
async function fetchByChunkingBoundsPaddedPostFilter(ids) {
const tx = db.transaction("store")
const store = tx.store
const results = []
let i = 0
ids.sort((a, b) => a - b)
for (const chunk of splitIntoContigiousChunks(ids, 10)) {
store
.getAll(IDBKeyRange.bound(chunk.start, chunk.end))
.then(values => {
for (const value of values) {
if (value.key === ids[i]) {
i++
results.push(value)
}
}
})
}
await tx.done
return results
}
function* splitIntoContigiousChunks(values, padding = 1) {
let chunk
for (const value of values) {
if (!chunk) {
chunk = { start: value, end: value }
continue
} else if (chunk.end + padding >= value) {
chunk.end = value
continue
} else {
yield chunk
chunk = { start: value, end: value }
}
}
if (chunk) yield chunk
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment