Skip to content

Instantly share code, notes, and snippets.

@anticlergygang
Created April 2, 2019 14:08
Show Gist options
  • Select an option

  • Save anticlergygang/5764d0b5d51dbcfc4af8cf89dbff94d9 to your computer and use it in GitHub Desktop.

Select an option

Save anticlergygang/5764d0b5d51dbcfc4af8cf89dbff94d9 to your computer and use it in GitHub Desktop.
const https = require('https')
const colors = require('colors')
let gthreads = []
let polthreads = []
let bizthreads = []
const catalogPromise = (board) => {
return new Promise((resolve, reject) => {
let path = `/${board}/catalog.json`
let url = `https://a.4cdn.org${path}`
let req = https.request({
host: 'a.4cdn.org',
path: path,
port: 443,
method: 'GET',
}, (res) => {
let data = ''
res.on('data', (chunk) => {
data = data.concat(chunk)
})
res.on('end', () => {
try {
let jsonRes = JSON.parse(data)
resolve(jsonRes)
} catch (err) {
reject(`failed to parse data:\n${util.inspect(data)}\n`)
}
})
})
req.on('error', (err) => {
reject(err)
})
req.end()
})
}
const threadPromise = (board, thread) => {
return new Promise((resolve, reject) => {
let path = `/${board}/thread/${thread}.json`
console.log(path)
let url = `https://a.4cdn.org${path}`
let req = https.request({
host: 'a.4cdn.org',
path: path,
port: 443,
method: 'GET',
}, (res) => {
let data = ''
res.on('data', (chunk) => {
data = data.concat(chunk)
})
res.on('end', () => {
try {
let jsonRes = JSON.parse(data)
resolve(jsonRes)
} catch (err) {
reject(`failed to parse data:\n${util.inspect(data)}\n`)
}
})
})
req.on('error', (err) => {
reject(err)
})
req.end()
})
}
catalogPromise('pol').then(pages => {
let threadPromises = []
let readyForNewRequest = true
pages.forEach((page, pageIndex) => {
page.threads.forEach((thread, threadIndex) => {
threadPromises.push(['pol', thread.no])
})
let scrapeInterval = setInterval(() => {
if (threadPromises.length !== 0 && readyForNewRequest === true) {
readyForNewRequest = false
try {
threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => {
console.log(threadInfo)
polthreads.push(threadInfo)
threadPromises.splice(0, 1)
readyForNewRequest = true
}).catch(err => {
console.log(err)
threadPromises.splice(0, 1)
readyForNewRequest = true
})
} catch (err) {
console.log(err)
}
}
}, (1100))
})
}).catch(err => {
console.log(err)
})
polthreads.forEach((thread, threadIndex) => {
console.log(`${thread.posts[0].no}: ${thread.posts[0].sub}`.greenBG)
thread.posts.forEach((post, postIndex) => {
if (post.com !== undefined) {
let comOut = post.com
if (comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g)) {
comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g).forEach((e, i) => {
comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`)
})
}
comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/&#039;/g, '\'').replace(/<span class="quote">&gt;/g, '>').replace(/&gt;/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/&quot;/g, '"').replace(/&lt;/g, '<').replace(/&amp;/g, '&')
console.log(`${post.no}: ${comOut}`)
}
})
})
let videos = []
videos.forEach((e, i) => {
let path = `/get_video_info?video_id=${e.split('=')[1]}`
let url = `https://www.youtube.com${path}`
let req = https.request({
host: 'www.youtube.com',
path: path,
port: 443,
method: 'GET',
}, (res) => {
let data = ''
res.on('data', (chunk) => {
data = data.concat(chunk)
})
res.on('end', () => {
try {
console.log(decodeURIComponent(data.split('&title=')[1].split('&')[0].replace(/[+]/g, ' ')))
} catch (e) {
console.error(e);
}
})
})
req.on('error', (err) => {
console.log(err)
})
req.end()
})
// catalogPromise('g').then(pages => {
// let threadPromises = []
// let readyForNewRequest = true
// pages.forEach((page, pageIndex) => {
// page.threads.forEach((thread, threadIndex) => {
// threadPromises.push(['g', thread.no])
// console.log(threadPromises.length)
// })
// let scrapeInterval = setInterval(() => {
// if (threadPromises.length !== 0 && readyForNewRequest === true) {
// readyForNewRequest = false
// try {
// threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => {
// console.log(threadInfo)
// gthreads.push(threadInfo)
// threadPromises.splice(0, 1)
// readyForNewRequest = true
// }).catch(err => {
// console.log(err)
// threadPromises.splice(0, 1)
// readyForNewRequest = true
// })
// } catch (err) {
// console.log(err)
// }
// }
// }, (1100))
// })
// }).catch(err => {
// console.log(err)
// })
// catalogPromise('biz').then(pages => {
// let threadPromises = []
// let readyForNewRequest = true
// pages.forEach((page, pageIndex) => {
// page.threads.forEach((thread, threadIndex) => {
// threadPromises.push(['biz', thread.no])
// })
// let scrapeInterval = setInterval(() => {
// if (threadPromises.length !== 0 && readyForNewRequest === true) {
// readyForNewRequest = false
// try {
// threadPromise(threadPromises[0][0], threadPromises[0][1]).then(threadInfo => {
// console.log(threadInfo)
// bizthreads.push(threadInfo)
// threadPromises.splice(0, 1)
// readyForNewRequest = true
// }).catch(err => {
// console.log(err)
// threadPromises.splice(0, 1)
// readyForNewRequest = true
// })
// } catch (err) {
// console.log(err)
// }
// }
// }, (1100))
// })
// }).catch(err => {
// console.log(err)
// })
// gthreads.forEach((thread, threadIndex) => {
// thread.posts.forEach((post, postIndex) => {
// if (post.com !== undefined) {
// let comOut = post.com
// if (comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g)) {
// comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g).forEach((e, i) => {
// comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`)
// })
// }
// comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/&#039;/g, '\'').replace(/<span class="quote">&gt;/g, '>').replace(/&gt;/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/&quot;/g, '"').replace(/&lt;/g, '<').replace(/&amp;/g, '&')
// console.log('')
// console.log(`${thread.posts[0].no}->${post.no}:`)
// console.log(comOut)
// console.log('')
// }
// })
// })
// bizthreads.forEach((thread, threadIndex) => {
// console.log(`${thread.posts[0].no}: ${thread.posts[0].sub}`.greenBG)
// thread.posts.forEach((post, postIndex) => {
// if (post.com !== undefined) {
// let comOut = post.com
// if (comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g)) {
// comOut.match(/<a href="#p\d*" class="quotelink">&gt;&gt;\d*<\/a>/g).forEach((e, i) => {
// comOut = comOut.replace(e, `>>${e.match(/\d\d\d\d\d\d\d\d/)[0]}`)
// })
// }
// comOut = comOut.replace(/<wbr>/g, '').replace(/<br>/g, '\n').replace(/&#039;/g, '\'').replace(/<span class="quote">&gt;/g, '>').replace(/&gt;/g, '>').replace(/<\/span>/g, '').replace(/<pre class="prettyprint">/g, '\n[code]\n').replace(/<\/pre>/g, '\n[/code]\n').replace(/&quot;/g, '"').replace(/&lt;/g, '<').replace(/&amp;/g, '&')
// console.log(`${post.no}: ${comOut}`)
// }
// })
// })
// https://www.tunemymusic.com/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment