Created
April 9, 2017 19:10
-
-
Save Nachtjagdgeschwader/d61f40681218bdc0c8deaa97efab84f9 to your computer and use it in GitHub Desktop.
Save data about Facebook posts: "Date and time", 'User', 'Text', 'Reactions', 'Shares', 'Comments', 'URL' using general search by keyword
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| from selenium import webdriver | |
| from selenium.webdriver import DesiredCapabilities | |
| from selenium.webdriver.common.keys import Keys | |
| from urllib import parse | |
| import re | |
| from time import sleep | |
| from datetime import datetime | |
| import xlsxwriter | |
| desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() | |
| desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = '(KHTML, like Gecko) Chrome/57.0.2987.133' | |
| dr = webdriver.PhantomJS(executable_path='S:/Path to PhantomJS folder/phantomjs-2.1.1-windows/bin/phantomjs', | |
| desired_capabilities=desired_capabilities) | |
| dr.get("https://www.facebook.com/login.php") | |
| login = dr.find_element_by_id("email") | |
| login.clear() | |
| login.send_keys("your email") | |
| password = dr.find_element_by_id("pass") | |
| password.clear() | |
| password.send_keys("your password") | |
| password.send_keys(Keys.RETURN) | |
| query=parse.quote('keyword(s) to search'.encode('utf-8')) | |
| dr.get("https://www.facebook.com/search/str/%s/stories-keyword/stories-public" % query) | |
| deepness=0 | |
| while deepness<=2: | |
| dr.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
| sleep(2) | |
| deepness+=1 | |
| # deepness is the quantity of "scroll downs" the results page. | |
| # Here it equals 2 (deepness<=2) | |
| urls=dr.find_elements_by_css_selector("span.fsm.fwn.fcg a._5pcq") | |
| links=[] | |
| for url in urls: | |
| links.append(url.get_attribute("href")) | |
| text=[] | |
| date=[] | |
| author=[] | |
| reactions=[] | |
| shares=[] | |
| comments=[] | |
| for link in links: | |
| dr.get(link) | |
| try: | |
| reactions.append(int(dr.find_element_by_css_selector("._4arz span").text)) | |
| except: | |
| try: | |
| dr.find_element_by_css_selector(".UFILikeSentence").click() | |
| sleep(1) | |
| reactions.append(int(dr.find_element_by_css_selector("._4arz span").text)) | |
| except: | |
| reactions.append(0) | |
| try: | |
| share=re.sub("\D", "", dr.find_element_by_css_selector(".UFIShareLink em").text) | |
| shares.append(int(share)) | |
| except: | |
| shares.append(0) | |
| try: | |
| text.append(dr.find_element_by_css_selector("p").text) | |
| except: | |
| try: | |
| text.append(dr.find_element_by_css_selector(".hasCaption div").text) | |
| except: | |
| text.append('') | |
| try: | |
| author.append(dr.find_element_by_css_selector(".fwb.fcg").text) | |
| except: | |
| try: | |
| author.append(dr.find_element_by_css_selector(".profileLink").text) | |
| except: | |
| try: | |
| author.append(dr.find_element_by_css_selector(".fbPhotoContributorName a").text) | |
| except: | |
| author.append('') | |
| try: | |
| dr.execute_script("window.scrollTo(0, document.body.scrollHeight/4);") | |
| sleep(2) | |
| comment=dr.find_element_by_css_selector("span.fcg em") | |
| comment=re.sub("\D", "", comment.text) | |
| comments.append(int(comment)) | |
| except: | |
| comments.append('') | |
| datestamp=int((dr.find_element_by_css_selector("abbr").get_attribute("data-utime"))) | |
| date.append(datetime.fromtimestamp(datestamp)) | |
| workbook = xlsxwriter.Workbook('S:/Path to folder where results to be saved/Results.xlsx') | |
| f = workbook.add_worksheet() | |
| f.write_row(0, 0,["Date and time", 'User', 'Text', 'Reactions', 'Shares', 'Comments', 'URL']) | |
| format_date = workbook.add_format({'num_format': 'dd.mm.yyyy hh:mm:ss'}) | |
| f.write_column('A2', date, format_date) | |
| f.write_column('B2', author) | |
| f.write_column('C2', text) | |
| f.write_column('D2', reactions) | |
| f.write_column('E2', shares) | |
| f.write_column('F2', comments) | |
| f.write_column('G2', links) | |
| workbook.close() | |
| dr.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment