Created
April 2, 2019 14:08
-
-
Save anticlergygang/5764d0b5d51dbcfc4af8cf89dbff94d9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const https = require('https') | |
| const colors = require('colors') | |
| let gthreads = [] | |
| let polthreads = [] | |
| let bizthreads = [] | |
| const catalogPromise = (board) => { | |
| return new Promise((resolve, reject) => { | |
| let path = `/${board}/catalog.json` | |
| let url = `https://a.4cdn.org${path}` | |
| let req = https.request({ | |
| host: 'a.4cdn.org', | |
| path: path, | |
| port: 443, | |
| method: 'GET', | |
| }, (res) => { | |
| let data = '' | |
| res.on('data', (chunk) => { | |
| data = data.concat(chunk) | |
| }) | |
| res.on('end', () => { | |
| try { | |
| let jsonRes = JSON.parse(data) | |
| resolve(jsonRes) | |
| } catch (err) { | |
| reject(`failed to parse data:\n${util.inspect(data)}\n`) | |
| } | |
| }) | |
| }) | |
| req.on('error', (err) => { | |
| reject(err) | |
| }) | |
| req.end() | |
| }) | |
| } | |
| const threadPromise = (board, thread) => { | |
| return new Promise((resolve, reject) => { | |
| let path = `/${board}/thread/${thread}.json` | |
| console.log(path) | |
| let url = `https://a.4cdn.org${path}` | |
| let req = https.request({ | |
| host: 'a.4cdn.org', | |
| path: path, | |
| port: 443, | |
| method: 'GET', | |
| }, (res) => { | |
| let data = '' | |
| res.on('data', (chunk) => { | |
| data = data.concat(chunk) | |
| }) | |
| res.on('end', () => { | |
| try { | |
| let jsonRes = JSON.parse(data) | |
| resolve(jsonRes) | |
| } catch (err) { | |
| reject(`failed to parse data:\n${util.inspect(data)}\n`) | |
| } | |
| }) | |
| }) | |
| req.on('error', (err) => { | |
| reject(err) | |
| }) | |
| req.end() | |
| }) | |
| } | |
| catalogPromise('pol').then(pages => { | |
| let threadPromises = [] | |
| let readyForNewRequest = true | |
| pages.forEach((page, pageIndex) => { | |
| page.threads.forEach((thread, threadIndex) => { | |
| threadPromises.push(['pol', thread.no]) | |
| }) | |
| let scrapeInterval = setInterval(() => { | |
| if (threadPromises.length !== 0 && readyForNewRequest === true) { | |
| readyForNewRequest = false | |
| try { | |
| threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => { | |
| console.log(threadInfo) | |
| polthreads.push(threadInfo) | |
| threadPromises.splice(0, 1) | |
| readyForNewRequest = true | |
| }).catch(err => { | |
| console.log(err) | |
| threadPromises.splice(0, 1) | |
| readyForNewRequest = true | |
| }) | |
| } catch (err) { | |
| console.log(err) | |
| } | |
| } | |
| }, (1100)) | |
| }) | |
| }).catch(err => { | |
| console.log(err) | |
| }) | |
| polthreads.forEach((thread, threadIndex) => { | |
| console.log(`${thread.posts[0].no}: ${thread.posts[0].sub}`.greenBG) | |
| thread.posts.forEach((post, postIndex) => { | |
| if (post.com !== undefined) { | |
| let comOut = post.com | |
| if (comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g)) { | |
| comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g).forEach((e, i) => { | |
| comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`) | |
| }) | |
| } | |
| comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/'/g, '\'').replace(/<span class="quote">>/g, '>').replace(/>/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/"/g, '"').replace(/</g, '<').replace(/&/g, '&') | |
| console.log(`${post.no}: ${comOut}`) | |
| } | |
| }) | |
| }) | |
| let videos = [] | |
| videos.forEach((e, i) => { | |
| let path = `/get_video_info?video_id=${e.split('=')[1]}` | |
| let url = `https://www.youtube.com${path}` | |
| let req = https.request({ | |
| host: 'www.youtube.com', | |
| path: path, | |
| port: 443, | |
| method: 'GET', | |
| }, (res) => { | |
| let data = '' | |
| res.on('data', (chunk) => { | |
| data = data.concat(chunk) | |
| }) | |
| res.on('end', () => { | |
| try { | |
| console.log(decodeURIComponent(data.split('&title=')[1].split('&')[0].replace(/[+]/g, ' '))) | |
| } catch (e) { | |
| console.error(e); | |
| } | |
| }) | |
| }) | |
| req.on('error', (err) => { | |
| console.log(err) | |
| }) | |
| req.end() | |
| }) | |
| // catalogPromise('g').then(pages => { | |
| // let threadPromises = [] | |
| // let readyForNewRequest = true | |
| // pages.forEach((page, pageIndex) => { | |
| // page.threads.forEach((thread, threadIndex) => { | |
| // threadPromises.push(['g', thread.no]) | |
| // console.log(threadPromises.length) | |
| // }) | |
| // let scrapeInterval = setInterval(() => { | |
| // if (threadPromises.length !== 0 && readyForNewRequest === true) { | |
| // readyForNewRequest = false | |
| // try { | |
| // threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => { | |
| // console.log(threadInfo) | |
| // gthreads.push(threadInfo) | |
| // threadPromises.splice(0, 1) | |
| // readyForNewRequest = true | |
| // }).catch(err => { | |
| // console.log(err) | |
| // threadPromises.splice(0, 1) | |
| // readyForNewRequest = true | |
| // }) | |
| // } catch (err) { | |
| // console.log(err) | |
| // } | |
| // } | |
| // }, (1100)) | |
| // }) | |
| // }).catch(err => { | |
| // console.log(err) | |
| // }) | |
| // catalogPromise('biz').then(pages => { | |
| // let threadPromises = [] | |
| // let readyForNewRequest = true | |
| // pages.forEach((page, pageIndex) => { | |
| // page.threads.forEach((thread, threadIndex) => { | |
| // threadPromises.push(['biz', thread.no]) | |
| // }) | |
| // let scrapeInterval = setInterval(() => { | |
| // if (threadPromises.length !== 0 && readyForNewRequest === true) { | |
| // readyForNewRequest = false | |
| // try { | |
| // threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => { | |
| // console.log(threadInfo) | |
| // bizthreads.push(threadInfo) | |
| // threadPromises.splice(0, 1) | |
| // readyForNewRequest = true | |
| // }).catch(err => { | |
| // console.log(err) | |
| // threadPromises.splice(0, 1) | |
| // readyForNewRequest = true | |
| // }) | |
| // } catch (err) { | |
| // console.log(err) | |
| // } | |
| // } | |
| // }, (1100)) | |
| // }) | |
| // }).catch(err => { | |
| // console.log(err) | |
| // }) | |
| // gthreads.forEach((thread, threadIndex) => { | |
| // thread.posts.forEach((post, postIndex) => { | |
| // if (post.com !== undefined) { | |
| // let comOut = post.com | |
| // if (comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g)) { | |
| // comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g).forEach((e, i) => { | |
| // comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`) | |
| // }) | |
| // } | |
| // comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/'/g, '\'').replace(/<span class="quote">>/g, '>').replace(/>/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/"/g, '"').replace(/</g, '<').replace(/&/g, '&') | |
| // console.log('') | |
| // console.log(`${thread.posts[0].no}->${post.no}:`) | |
| // console.log(comOut) | |
| // console.log('') | |
| // } | |
| // }) | |
| // }) | |
| // bizthreads.forEach((thread, threadIndex) => { | |
| // console.log(`${thread.posts[0].no}: ${thread.posts[0].sub}`.greenBG) | |
| // thread.posts.forEach((post, postIndex) => { | |
| // if (post.com !== undefined) { | |
| // let comOut = post.com | |
| // if (comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g)) { | |
| // comOut.match(/<a href="#p\d*" class="quotelink">>>\d*<\/a>/g).forEach((e, i) => { | |
| // comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`) | |
| // }) | |
| // } | |
| // comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/'/g, '\'').replace(/<span class="quote">>/g, '>').replace(/>/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/"/g, '"').replace(/</g, '<').replace(/&/g, '&') | |
| // console.log(`${post.no}: ${comOut}`) | |
| // } | |
| // }) | |
| // }) | |
| // https://www.tunemymusic.com/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment