Skip to content

Instantly share code, notes, and snippets.

@AKST
Last active November 29, 2025 10:49
Show Gist options
  • Select an option

  • Save AKST/f6ce5ce0a4fd51c7f0a8c899d24e8635 to your computer and use it in GitHub Desktop.

Select an option

Save AKST/f6ce5ce0a4fd51c7f0a8c899d24e8635 to your computer and use it in GitHub Desktop.
import { mkdir } from 'node:fs/promises';
import { homedir } from 'node:os';
import fs from 'node:fs';
import { pipeline } from 'node:stream/promises';
import { Readable } from 'node:stream';
import { JSDOM } from 'jsdom';
const RBA_HOST = 'https://www.rba.gov.au';
function * getYears() {
// The following are unavailable, 1964, 1968, 1969, 1970, 1971, 1972, 1973, 1974
yield * [0, 1, 2, 3, 5, 6, 7].map(y => 1960 + y);
yield * [5, 6, 7, 8, 9].map(y => 1970 + y);
yield * [0, 1, 2, 3, 4, 5, 6, 7, 8, 9].flatMap(y => (
[1980, 1990, 2000, 2010].map(decade => decade + y)
))
yield * [0, 1, 2, 3, 4].map(y => 2020 + y);
}
function checkStatus(status, url) {
switch (status) {
case 200:
break;
case 404:
console.error(`not found, ${url}`);
break;
default:
console.error('bad request', status);
break;
}
}
async function loadDocument(url) {
try {
await new Promise(r => setTimeout(r, 100));
const response = await fetch(url);
const text = await response.text();
const { document } = new JSDOM(text).window;
checkStatus(response.status, url);
return [response.status === 200, document, text];
} catch (e) {
console.error('crash on', url);
throw e
}
}
async function saveImage(image, fname) {
try {
await new Promise(r => setTimeout(r, 100));
const response = await fetch(image);
checkStatus(response.status, image);
await pipeline(Readable.fromWeb(response.body), fs.createWriteStream(fname));
return [true, undefined];
} catch (e) {
return [false, e];
}
}
async function main () {
const outDir = `${homedir()}/Downloads/rba-${new Date().toISOString()}`
await mkdir(outDir, { recursive: true });
for (const year of getYears()) {
const fdir = `${outDir}/${year}`;
await mkdir(fdir, { recursive: true });
const annualReportPage = `${RBA_HOST}/publications/annual-reports/rba/${year}/`;
console.info('loading:year', annualReportPage);
const [ok, reportDocument] = await loadDocument(annualReportPage);
if (!ok) continue;
let imageCounter = 1;
for (const link of [
...reportDocument.querySelectorAll('#content > ul li a'),
...reportDocument.querySelectorAll('#content > section > ul li a'),
]) {
const href = new URL(link.href, annualReportPage).href;
console.info('loading:year:sub', href);
const [ok, page] = await loadDocument(href);
if (!ok) continue;
for (const imageEl of page.querySelectorAll('#content > .image-graph > figure > img')) {
const image = new URL(imageEl.src, href).href;
const ext = image.slice(image.lastIndexOf('.')+1);
const fname = `${fdir}/graph-${year}-${imageCounter++}.${ext}`;
const [ok, error] = await saveImage(image, fname);
if (!ok) console.error({ error, image, fname, href, year });
}
}
}
}
main().then(
() => console.log('done'),
e => console.error(e),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment