-
-
Save snowfluke/6faa92771ceaf41945c1821829b0a301 to your computer and use it in GitHub Desktop.
| // curl -fsSL https://bun.sh/install | bash | |
| // bun add ppu-pdf ppu-paddle-ocr onnxruntime-node | |
| // Run it: bun run index.ts | |
| import { PaddleOcrService } from "ppu-paddle-ocr"; | |
| import { PdfReader } from "ppu-pdf"; | |
| export const MODEL_BASE_URL = | |
| "https://media.githubusercontent.com/media/PT-Perkasa-Pilar-Utama/ppu-paddle-ocr-models/main"; | |
| export const DICT_BASE_URL = | |
| "https://raw.githubusercontent.com/PT-Perkasa-Pilar-Utama/ppu-paddle-ocr-models/main"; | |
| const pdfReader = new PdfReader({ verbose: false }); | |
| // Tweak the model variant and dictionary to balance the accuracy and performance. | |
| // Note that the dictionary should match the recognition model, otherwise the OCR results will be inaccurate. | |
| const ocr = new PaddleOcrService({ | |
| model: { | |
| detection: `${MODEL_BASE_URL}/detection/PP-OCRv5_mobile_det_infer.onnx`, | |
| recognition: `${MODEL_BASE_URL}/recognition/PP-OCRv5_mobile_rec_infer.onnx`, | |
| charactersDictionary: `${DICT_BASE_URL}/recognition/ppocrv5_dict.txt`, | |
| }, | |
| }); | |
| await ocr.initialize(); | |
| // Download OCR model and warm up cache | |
| console.log("Warming up OCR model..."); | |
| { | |
| const testBuffer = await Bun.file("./assets/opposite-expectation-scan.pdf").arrayBuffer(); | |
| const testDoc = pdfReader.open(testBuffer); | |
| const testCanvas = await pdfReader.renderAll(testDoc); | |
| await pdfReader.getTextsScanned(ocr, testCanvas); | |
| pdfReader.destroy(testDoc); | |
| } | |
| console.log("Warmup complete.\n"); | |
| console.time("Normal inference") | |
| { | |
| // 1. Reading the file from disk | |
| const fileScan = Bun.file("./assets/test_japanese.pdf"); | |
| const bufferScan = await fileScan.arrayBuffer(); | |
| // 2. Open and Render | |
| const pdfScan = pdfReader.open(bufferScan); | |
| const canvasMap = await pdfReader.renderAll(pdfScan); | |
| pdfReader.destroy(pdfScan); | |
| // 3. Extract OCR Texts | |
| const texts = await pdfReader.getTextsScanned(ocr, canvasMap); | |
| // 4. Rebuild Searchable PDF | |
| const pdfForRebuild = pdfReader.open(bufferScan); | |
| const rebuiltPdfBuffer = await pdfReader.rebuild(pdfForRebuild, texts); | |
| pdfReader.destroy(pdfForRebuild); | |
| // 5. Save onto disk | |
| await Bun.write("./test_japanese_searchable.pdf", rebuiltPdfBuffer); | |
| } | |
| console.timeEnd("Normal inference") | |
| // import { bench, group, run } from "mitata"; | |
| // console.log("\nStarting benchmarking") | |
| // group("ppu-pdf e2e processing", () => { | |
| // bench("Extract Texts and Rebuild PDF", async () => { | |
| // const fileScan = Bun.file("./assets/test_japanese.pdf"); | |
| // const bufferScan = await fileScan.arrayBuffer(); | |
| // const pdfScan = pdfReader.open(bufferScan); | |
| // const canvasMap = await pdfReader.renderAll(pdfScan); | |
| // pdfReader.destroy(pdfScan); | |
| // const texts = await pdfReader.getTextsScanned(ocr, canvasMap); | |
| // const pdfForRebuild = pdfReader.open(bufferScan); | |
| // const rebuiltPdfBuffer = await pdfReader.rebuild(pdfForRebuild, texts); | |
| // pdfReader.destroy(pdfForRebuild); | |
| // await Bun.write("./test_japanese_searchable.pdf", rebuiltPdfBuffer); | |
| // }); | |
| // }); | |
| // await run({ | |
| // colors: true, | |
| // }); | |
| await ocr.destroy(); | |
| // BENCHMARK RESULT | |
| // benchmark avg (min … max) p75 / p99 (min … top 1%) | |
| // -------------------------------------------- ------------------------------- | |
| // • ppu-pdf e2e processing | |
| // -------------------------------------------- ------------------------------- | |
| // japan_PP-OCRv3_mobile_rec_infer.onnx + japan_dict.txt | |
| // Extract Texts and Rebuild PDF 798.30 ms/iter 799.05 ms █ █ | |
| // (783.87 ms … 850.33 ms) 817.52 ms █ █ | |
| // (224.00 kb … 18.47 mb) 9.74 mb █▁█▁▁▁█▁██▁▁▁▁▁█▁▁▁▁█ | |
| // PP-OCRv5_mobile_rec_infer.onnx + ppocrv5_dict.txt | |
| // Extract Texts and Rebuild PDF 802.18 ms/iter 803.59 ms █ █ █ | |
| // (792.74 ms … 825.94 ms) 817.62 ms █ █▅ █▅ ▅ ▅ ▅ | |
| // ( 16.00 kb … 15.58 mb) 7.87 mb █▁██▁██▁▁█▁▁▁█▁▁▁▁▁▁█ | |
| // PP-OCRv5_server_rec_infer.onnx + ppocrv5_dict.txt | |
| // Extract Texts and Rebuild PDF 802.84 ms/iter 804.37 ms █ | |
| // (797.71 ms … 819.77 ms) 808.87 ms ▅█▅▅▅▅▅ ▅ ▅ ▅ | |
| // (384.00 kb … 33.72 mb) 11.90 mb ███████▁▁▁▁▁█▁▁▁█▁▁▁█ |
OCR Benchmark Report
Results
| Model | Avg | p75 | p99 | Avg Mem | Max Mem |
|---|---|---|---|---|---|
| japan_PP-OCRv3_mobile | 798.30 ms | 799.05 ms | 817.52 ms | 9.74 mb | 18.47 mb |
| PP-OCRv5_mobile | 802.18 ms | 803.59 ms | 817.62 ms | 7.87 mb | 15.58 mb |
| PP-OCRv5_server | 802.84 ms | 804.37 ms | 808.87 ms | 11.90 mb | 33.72 mb |
Analysis
Latency
All three models complete in ~800ms for the full document with no meaningful difference. The gap between fastest and slowest is only 4.54ms — within measurement noise.
Memory
PP-OCRv5_mobile is the most efficient at 7.87mb average. PP-OCRv5_server consumes the most at 11.90mb average with a spike to 33.72mb, reflecting larger model weights loaded at runtime. japan_PP-OCRv3_mobile shows the widest swing (224kb → 18.47mb), suggesting inconsistent allocation, possibly from lazy model initialization on the first iteration.
Latency Consistency
PP-OCRv5_server has the tightest spread (797–819ms, p99 808ms), making it the most predictable under load. PP-OCRv5_mobile is close behind. japan_PP-OCRv3_mobile has the widest range (783–850ms), with occasional spikes visible in the histogram.
Recommendation
Use PP-OCRv5_mobile for production.
- Lowest memory footprint (7.87mb avg, 15.58mb max) — important for WebAssembly where heap is constrained
- Speed is on par with the other models (~800ms)
- Better latency consistency than
PP-OCRv3_mobile - No overhead cost of server model weights unlike
PP-OCRv5_server
PP-OCRv5_server is worth considering only if accuracy on complex Japanese layouts proves insufficient with the mobile model — the memory tradeoff is significant in a browser context.
Hypotetichal 180-page in Bun.js run time
| Model | Est. Duration | Est. Avg Mem | Est. Max Mem |
|---|---|---|---|
| japan_PP-OCRv3_mobile | ~14.4 s | ~174 mb | ~332 mb |
| PP-OCRv5_mobile | ~14.4 s | ~142 mb | ~280 mb |
| PP-OCRv5_server | ~14.5 s | ~214 mb | ~607 mb |
You can remove the benchmark-related code