Created
December 23, 2025 04:47
-
-
Save Frenzycore/6e4b2739868a9e2f47c222241e51a9df to your computer and use it in GitHub Desktop.
scraper for site https://wikipedia.org/wiki/Special:Random
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import axios from "axios"; | |
| import * as cheerio from "cheerio"; | |
| async function scrapeWikipediaRandom() { | |
| const url = 'https://wikipedia.org/wiki/Special:Random'; | |
| try { | |
| const response = await axios.get(url, { | |
| headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' } | |
| }); | |
| const html = response.data; | |
| const finalUrl = response.request.res.responseUrl; | |
| const $ = cheerio.load(html); | |
| const title = $('#firstHeading').text().trim(); | |
| const shortDescription = $('div.shortdescription').text().trim() || null; | |
| const leadParagraphs = $('.mw-parser-output > p:not(.mw-empty-elt)') | |
| .map((i, el) => $(el).text().trim()) | |
| .get() | |
| .slice(0, 2) | |
| .join('\n\n'); | |
| const infoboxData = {}; | |
| $('table.infobox tr').each((index, element) => { | |
| const key = $(element).find('th').text().trim(); | |
| const value = $(element).find('td').text().trim().replace(/\s\s+/g, ' '); | |
| if (key && value) { | |
| infoboxData[key] = value; | |
| } | |
| }); | |
| const mainImageUrl = $('table.infobox').find('.infobox-image img').attr('src'); | |
| const mainImage = mainImageUrl ? `https:${mainImageUrl}` : null; | |
| const tableOfContents = $('#toc ul li a .toctext') | |
| .map((i, el) => $(el).text().trim()) | |
| .get(); | |
| const categories = $('#catlinks ul li a') | |
| .map((i, el) => $(el).text().trim()) | |
| .get(); | |
| const externalLinks = []; | |
| $('#External_links').parent().nextAll('ul').first().find('li a.external').each((i, el) => { | |
| externalLinks.push({ | |
| text: $(el).text().trim(), | |
| url: $(el).attr('href') | |
| }); | |
| }); | |
| const coordinatesText = $('#coordinates .geo-dec').text().trim(); | |
| const coordinates = coordinatesText || $('#coordinates .geo-dms').text().trim() || null; | |
| return { | |
| title, | |
| url: finalUrl, | |
| shortDescription, | |
| leadParagraphs, | |
| mainImage, | |
| coordinates, | |
| infobox: Object.keys(infoboxData).length > 0 ? infoboxData : null, | |
| tableOfContents: tableOfContents.length > 0 ? tableOfContents : null, | |
| categories: categories.length > 0 ? categories : null, | |
| externalLinks: externalLinks.length > 0 ? externalLinks : null, | |
| }; | |
| } catch (error) { | |
| throw new Error(`Scraping failed: ${error.message}`); | |
| } | |
| } | |
| export { scrapeWikipediaRandom }; | |
| // 4:29 -- https://github.com/Frenzycore -- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment