Last active
November 27, 2025 09:00
-
-
Save ErikBrendel/240864885851acd090bc7a516b8a3ca9 to your computer and use it in GitHub Desktop.
Extract archbee documentation document tree
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /** | |
| * Extracts all documentation pages from Archbee __NEXT_DATA__ | |
| */ | |
| const https = require("https") | |
| function fetch(url) { | |
| return new Promise((resolve, reject) => { | |
| https | |
| .get(url, (res) => { | |
| let data = "" | |
| res.on("data", (chunk) => (data += chunk)) | |
| res.on("end", () => resolve({ status: res.statusCode, body: data })) | |
| }) | |
| .on("error", reject) | |
| }) | |
| } | |
| function extractNextData(html) { | |
| const match = html.match(/<script id="__NEXT_DATA__"[^>]*>([^<]+)<\/script>/) | |
| if (!match) return null | |
| try { | |
| return JSON.parse(match[1]) | |
| } catch (e) { | |
| console.error("Failed to parse __NEXT_DATA__:", e.message) | |
| return null | |
| } | |
| } | |
| function extractPages(nextData) { | |
| const pages = [] | |
| // Navigate to the document tree | |
| const docSpace = nextData?.props?.pageProps?._docSpace | |
| if (!docSpace || !docSpace.publicDocsTree) { | |
| console.error("Could not find publicDocsTree in data structure") | |
| return pages | |
| } | |
| const tree = docSpace.publicDocsTree | |
| // Recursively extract all documents from tree | |
| function extractFromNode(node, ancestors = []) { | |
| if (!node || !node.id) return | |
| const docId = node.id.replace("PUBLISHED-", "") | |
| const urlPath = "/" + node.urlKey | |
| const title = node.name || node.categoryName | |
| // Extract just the slug (last segment of urlKey) | |
| const slug = node.urlKey.split("/").pop() | |
| const ancestorsPath = ancestors.join("/") | |
| if (docId && urlPath && title) { | |
| pages.push({ title, urlPath, ancestors: ancestorsPath, docId }) | |
| } | |
| // Process children recursively, passing current slug as ancestor | |
| if (node.children && Array.isArray(node.children)) { | |
| const childAncestors = [...ancestors, slug] | |
| node.children.forEach((child) => extractFromNode(child, childAncestors)) | |
| } | |
| } | |
| // Extract from each root node | |
| tree.forEach((node) => extractFromNode(node, [])) | |
| return pages | |
| } | |
| function escapeCSV(value) { | |
| if (value.includes('"') || value.includes(",") || value.includes("\n")) { | |
| return '"' + value.replace(/"/g, '""') + '"' | |
| } | |
| return value | |
| } | |
| async function main() { | |
| const docsUrl = process.argv[2] | |
| if (!docsUrl) { | |
| console.error("Usage: node archbee-parse.cjs <docs-url>") | |
| console.error("Example: node archbee-parse.cjs https://docs.your-page.com/") | |
| process.exit(1) | |
| } | |
| console.error("Fetching documentation data...\n") | |
| const { status, body } = await fetch(docsUrl) | |
| if (status !== 200) { | |
| console.error(`Failed to fetch page: HTTP ${status}`) | |
| process.exit(1) | |
| } | |
| const nextData = extractNextData(body) | |
| if (!nextData) { | |
| console.error("Could not extract __NEXT_DATA__ from page") | |
| process.exit(1) | |
| } | |
| const pages = extractPages(nextData) | |
| console.error(`Found ${pages.length} pages.\n`) | |
| // Output as CSV | |
| console.log("title,urlPath,ancestors,ID") | |
| pages.forEach((page) => { | |
| console.log( | |
| `${escapeCSV(page.title)},${escapeCSV(page.urlPath)},${escapeCSV(page.ancestors)},${page.docId}`, | |
| ) | |
| }) | |
| } | |
| main().catch(console.error) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment