Last active
February 26, 2025 05:08
-
-
Save m-esm/79d59b2f6a84f30a5ada23ad4abaf336 to your computer and use it in GitHub Desktop.
Extract tweets from browser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| if (!window.tweets) window.tweets = {}; | |
| /** | |
| * @param tweetElem {HTMLElement} | |
| */ | |
| window.parseTweetElement = (tweetElem) => { | |
| const isQuote = !!Array.from(tweetElem.querySelectorAll("span")).find( | |
| (p) => p.textContent === "Quote" | |
| ); | |
| let quotedTweet; | |
| if (isQuote) { | |
| quotedTweetElem = Array.from(tweetElem.querySelectorAll("span")).find( | |
| (p) => p.textContent === "Quote" | |
| )?.parentElement.nextSibling; | |
| if (quotedTweetElem) | |
| quotedTweet = window.parseTweetElement(quotedTweetElem); | |
| quotedTweetElem?.remove(); | |
| } | |
| const username = Array.from(tweetElem.querySelectorAll("span")) | |
| .map((p) => p?.textContent) | |
| .find((p) => p?.startsWith("@")); | |
| const avatar = tweetElem | |
| .querySelector('[data-testid="Tweet-User-Avatar"] img') | |
| ?.getAttribute("src"); | |
| const text = tweetElem.querySelector( | |
| '[data-testid="tweetText"]' | |
| )?.textContent; | |
| const time = tweetElem.querySelector("time")?.getAttribute("datetime"); | |
| const link = tweetElem | |
| .querySelector("time") | |
| ?.parentElement?.getAttribute("href"); | |
| const isRetweet = !!tweetElem.querySelector('[data-testid="socialContext"]') | |
| ?.textContent; | |
| const retweetedBy = tweetElem.querySelector( | |
| '[data-testid="socialContext"] span span' | |
| )?.textContent; | |
| const images = Array.from( | |
| tweetElem.querySelectorAll('[data-testid="tweetPhoto"] img') | |
| ) | |
| .map((p) => p?.getAttribute("src")) | |
| .filter((p) => p); | |
| const tweet = { | |
| username, | |
| text, | |
| avatar, | |
| time, | |
| link: link ? `https://twitter.com${link}` : "", | |
| isRetweet, | |
| retweetedBy, | |
| isQuote, | |
| quotedTweet, | |
| images, | |
| }; | |
| return tweet; | |
| }; | |
| window.fetchTweets = () => { | |
| document.querySelectorAll('[data-testid="tweet"]').forEach((tweetElem) => { | |
| const tweet = window.parseTweetElement(tweetElem); | |
| if (Object.values(tweet).find((p) => !p)) return; | |
| if (!tweets[tweet.link]) tweets[tweet.link] = tweet; | |
| }); | |
| console.log(`Total tweets extracted: ${Object.keys(tweets).length}`); | |
| }; | |
| window.scrollAndExtract = async (scrollHeight, iterations) => { | |
| for (let i = 0; i < iterations; i++) { | |
| window.fetchTweets(); | |
| window.scrollBy(0, scrollHeight); | |
| await new Promise((resolve) => setTimeout(resolve, 3000)); | |
| } | |
| const jsonString = JSON.stringify(Object.values(tweets), null, 2); | |
| // Create a Blob from the JSON string | |
| const blob = new Blob([jsonString], { | |
| type: "application/json", | |
| }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement("a"); | |
| a.href = url; | |
| a.target = "_blank"; | |
| a.download = `tweets_${Date.now()}.json`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| URL.revokeObjectURL(url); | |
| }; | |
| await scrollAndExtract(10000, 3); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example extracted tweet:
{ "username": "@elonmusk", "text": "Interesting …", "avatar": "https://pbs.twimg.com/profile_images/1683325380441128960/yRsRRjGO_x96.jpg", "time": "2023-09-26T23:56:25.000Z", "link": "https://twitter.com/elonmusk/status/1706820047421759871", "isRetweet": false, "isQuote": true, "quotedTweet": { "username": "@GlobalAffairs", "text": "In response to today's discussion about disinformation in Europe, we reiterate that X is committed to complying with the DSA. \n\nThe EU’s own data shows other services saw greater changes in subscriber growth.", "avatar": "https://pbs.twimg.com/profile_images/1683512189213200385/i554EDOS_normal.jpg", "time": "2023-09-26T21:49:49.000Z", "link": null, "isRetweet": false, "isQuote": false, "images": [ "https://pbs.twimg.com/media/F6-5e3nXcAAz-J9?format=jpg&name=medium" ] }, "images": [ "https://pbs.twimg.com/media/F6-5e3nXcAAz-J9?format=jpg&name=medium" ] }