Last active
February 3, 2025 13:18
-
-
Save guillaume-rygn/ee8d5b406e2c2181a97eac1f5ffa6e5d to your computer and use it in GitHub Desktop.
Youtube_transcript_playlist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { YoutubeTranscript } from 'youtube-transcript'; | |
| import puppeteer from 'puppeteer'; | |
| import fs from 'fs'; | |
| import readline from 'readline'; | |
| const rl = readline.createInterface({ | |
| input: process.stdin, | |
| output: process.stdout | |
| }); | |
| rl.question('Veuillez entrer l\'URL de la playlist YouTube : ', async (playlistUrl) => { | |
| const browser = await puppeteer.launch({ headless: true }); | |
| const page = await browser.newPage(); | |
| await page.goto(playlistUrl, { waitUntil: 'networkidle2' }); | |
| await page.waitForSelector('#wc-endpoint'); | |
| const titlePlaylist = await page.evaluate(() => { | |
| const title = document.querySelectorAll("#header-description")[2].firstElementChild | |
| return title.textContent.replaceAll(" ", "_").trim(); | |
| }) | |
| const videos = await page.evaluate(() => { | |
| const videoData = []; | |
| document.querySelectorAll("#wc-endpoint").forEach(element => { | |
| const url = element.href; | |
| const titleElement = element.querySelector('#video-title'); | |
| const title = titleElement ? titleElement.textContent.trim() : 'Titre non trouvé'; | |
| videoData.push({ url, title }); | |
| }); | |
| return videoData; | |
| }); | |
| await browser.close(); | |
| const finalDoc = []; | |
| await Promise.all(videos.map(async (element) => { | |
| try { | |
| const response = await YoutubeTranscript.fetchTranscript(element.url); | |
| let alltext = ""; | |
| response.forEach(text => alltext += `${text.text} `); | |
| finalDoc.push({ url: element.url, title: element.title, transcription: alltext }); | |
| } catch (error) { | |
| console.error(`Erreur lors de la récupération de la transcription pour ${element.url}:`, error); | |
| } | |
| })); | |
| const data = JSON.stringify(finalDoc, null, 2); | |
| fs.writeFile(`${titlePlaylist}.txt`, data, (err) => { | |
| if (err) { | |
| console.error("Erreur lors de l'écriture du fichier", err); | |
| } else { | |
| console.log(`Fichier ${titlePlaylist}.txt créé avec succès`); | |
| } | |
| }); | |
| rl.close(); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment