Skip to content

Instantly share code, notes, and snippets.

@Nachtjagdgeschwader
Created April 9, 2017 19:10
Show Gist options
  • Select an option

  • Save Nachtjagdgeschwader/d61f40681218bdc0c8deaa97efab84f9 to your computer and use it in GitHub Desktop.

Select an option

Save Nachtjagdgeschwader/d61f40681218bdc0c8deaa97efab84f9 to your computer and use it in GitHub Desktop.
Save data about Facebook posts: "Date and time", 'User', 'Text', 'Reactions', 'Shares', 'Comments', 'URL' using general search by keyword
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.common.keys import Keys
from urllib import parse
import re
from time import sleep
from datetime import datetime
import xlsxwriter
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = '(KHTML, like Gecko) Chrome/57.0.2987.133'
dr = webdriver.PhantomJS(executable_path='S:/Path to PhantomJS folder/phantomjs-2.1.1-windows/bin/phantomjs',
desired_capabilities=desired_capabilities)
dr.get("https://www.facebook.com/login.php")
login = dr.find_element_by_id("email")
login.clear()
login.send_keys("your email")
password = dr.find_element_by_id("pass")
password.clear()
password.send_keys("your password")
password.send_keys(Keys.RETURN)
query=parse.quote('keyword(s) to search'.encode('utf-8'))
dr.get("https://www.facebook.com/search/str/%s/stories-keyword/stories-public" % query)
deepness=0
while deepness<=2:
dr.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(2)
deepness+=1
# deepness is the quantity of "scroll downs" the results page.
# Here it equals 2 (deepness<=2)
urls=dr.find_elements_by_css_selector("span.fsm.fwn.fcg a._5pcq")
links=[]
for url in urls:
links.append(url.get_attribute("href"))
text=[]
date=[]
author=[]
reactions=[]
shares=[]
comments=[]
for link in links:
dr.get(link)
try:
reactions.append(int(dr.find_element_by_css_selector("._4arz span").text))
except:
try:
dr.find_element_by_css_selector(".UFILikeSentence").click()
sleep(1)
reactions.append(int(dr.find_element_by_css_selector("._4arz span").text))
except:
reactions.append(0)
try:
share=re.sub("\D", "", dr.find_element_by_css_selector(".UFIShareLink em").text)
shares.append(int(share))
except:
shares.append(0)
try:
text.append(dr.find_element_by_css_selector("p").text)
except:
try:
text.append(dr.find_element_by_css_selector(".hasCaption div").text)
except:
text.append('')
try:
author.append(dr.find_element_by_css_selector(".fwb.fcg").text)
except:
try:
author.append(dr.find_element_by_css_selector(".profileLink").text)
except:
try:
author.append(dr.find_element_by_css_selector(".fbPhotoContributorName a").text)
except:
author.append('')
try:
dr.execute_script("window.scrollTo(0, document.body.scrollHeight/4);")
sleep(2)
comment=dr.find_element_by_css_selector("span.fcg em")
comment=re.sub("\D", "", comment.text)
comments.append(int(comment))
except:
comments.append('')
datestamp=int((dr.find_element_by_css_selector("abbr").get_attribute("data-utime")))
date.append(datetime.fromtimestamp(datestamp))
workbook = xlsxwriter.Workbook('S:/Path to folder where results to be saved/Results.xlsx')
f = workbook.add_worksheet()
f.write_row(0, 0,["Date and time", 'User', 'Text', 'Reactions', 'Shares', 'Comments', 'URL'])
format_date = workbook.add_format({'num_format': 'dd.mm.yyyy hh:mm:ss'})
f.write_column('A2', date, format_date)
f.write_column('B2', author)
f.write_column('C2', text)
f.write_column('D2', reactions)
f.write_column('E2', shares)
f.write_column('F2', comments)
f.write_column('G2', links)
workbook.close()
dr.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment