-
-
Save gavin19/8e2ed7547efcbb376e94f2057f951526 to your computer and use it in GitHub Desktop.
| from selenium.webdriver import Firefox, FirefoxOptions | |
| from selenium.common.exceptions import TimeoutException | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| import praw | |
| from time import sleep | |
| r = praw.Reddit(...) | |
| opts = FirefoxOptions() | |
| opts.add_argument("--headless") | |
| opts.set_preference("dom.push.enabled", False) # kill notification popup | |
| drv = Firefox(options=opts) | |
| timeout = 10 | |
| def login(): | |
| drv.get("https://www.reddit.com/login") | |
| user = drv.find_element(By.ID, "loginUsername") | |
| user.send_keys("your_username") | |
| pwd = drv.find_element(By.ID, "loginPassword") | |
| pwd.send_keys("your_password") | |
| btn = drv.find_element(By.CSS_SELECTOR, "button[type='submit']") | |
| btn.click() | |
| sleep(timeout) | |
| cookie = drv.find_element(By.XPATH, '//button[text()="Accept all"]') | |
| cookie.click() # kill cookie agreement popup. Probably not needed now | |
| sleep(timeout) | |
| login() | |
| for post in r.subreddit("some_sub").hot(limit=1): | |
| cmts = "https://www.reddit.com" + post.permalink | |
| drv.get(cmts) | |
| for comment in post.comments: | |
| id = f"t1_{comment.id}" | |
| try: | |
| cmt = WebDriverWait(drv, timeout).until( | |
| lambda x: x.find_element_by_id(id)) | |
| except TimeoutException: | |
| print("Page load timed out...") | |
| else: | |
| cmt.screenshot(id + ".png") |
`Traceback (most recent call last):
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\configparser.py", line 847, in items
d.update(self._sections[section])
KeyError: Ellipsis
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\achil\Bureau\tamerelapute.py", line 8, in
r = praw.Reddit(...)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\reddit.py", line 236, in init
self.config = Config(
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\config.py", line 84, in init
self.custom = dict(Config.CONFIG.items(site_name), **settings)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\configparser.py", line 850, in items
raise NoSectionError(section)
configparser.NoSectionError: No section: Ellipsis
You provided the name of a praw.ini configuration which does not exist.
For help with creating a Reddit instance, visit
https://praw.readthedocs.io/en/latest/code_overview/reddit_instance.html
For help on configuring PRAW, visit
https://praw.readthedocs.io/en/latest/getting_started/configuration.html`
Thank you for this! Huge insight: scraping the comments is significantly easier when logging in to reddit in the driver.
this is a life saver