Skip to content

Instantly share code, notes, and snippets.

@Frenzycore
Created December 23, 2025 04:24
Show Gist options
  • Select an option

  • Save Frenzycore/5a6f12675760fe698876dc23204d4560 to your computer and use it in GitHub Desktop.

Select an option

Save Frenzycore/5a6f12675760fe698876dc23204d4560 to your computer and use it in GitHub Desktop.
scraper for site https://quotes.toscrape.com
import axios from "axios";
import * as cheerio from "cheerio";
async function scrapeQuotesToScrape() {
let currentUrl = "https://quotes.toscrape.com/";
const allQuotes = [];
let page = 1;
try {
while (currentUrl) {
const { data: html } = await axios.get(currentUrl, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
},
});
const $ = cheerio.load(html);
$(".quote").each((_, el) => {
const element = $(el);
const text = element.find("span.text").text().trim();
const author = element.find("small.author").text().trim();
const authorUrlPath = element.find("a").attr("href");
const tags = element
.find(".tags a.tag")
.map((_, tagEl) => $(tagEl).text().trim())
.get();
allQuotes.push({
text,
author,
authorUrl: authorUrlPath
? new URL(authorUrlPath, currentUrl).href
: null,
tags,
page,
});
});
const nextPagePath = $(".pager .next a").attr("href");
currentUrl = nextPagePath
? new URL(nextPagePath, currentUrl).href
: null;
page++;
}
return {
totalQuotes: allQuotes.length,
quotes: allQuotes,
};
} catch (error) {
throw new Error(`Scraping failed: ${error.message}`);
}
}
export { scrapeQuotesToScrape };
// 4:29 -- https://github.com/Frenzycore --
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment