Skip to content

Instantly share code, notes, and snippets.

@Nachtjagdgeschwader
Last active April 10, 2017 12:37
Show Gist options
  • Select an option

  • Save Nachtjagdgeschwader/b5a90a5c2acc35e3859ab230bdf021d2 to your computer and use it in GitHub Desktop.

Select an option

Save Nachtjagdgeschwader/b5a90a5c2acc35e3859ab230bdf021d2 to your computer and use it in GitHub Desktop.
Save data ("URL","Date","Original Text","SharedText") about Facebook posts found by keyword with RSelenium and PhantomJS
install.packages("devtools")
library(devtools)
install_github("ropensci/RSelenium")
library(RSelenium)
psPath <- "S:/Path to PhantomJS folder/phantomjs-2.1.1-windows/bin/phantomjs.exe"
pJS <- phantom(pjs_cmd = psPath)
remDr <- remoteDriver(browserName = "phantomjs")
remDr$open()
loginURL <- 'https://www.facebook.com/login.php'
remDr$navigate(loginURL)
remDr$findElement("id", "email")$sendKeysToElement(list("Your email"))
remDr$findElement("id", "pass")$sendKeysToElement(list("Your password"))
remDr$findElement("id", "loginbutton")$clickElement()
searchURL <- 'https://www.facebook.com/search/top/?q=%D0%9A%D0%9C%D0%98%D0%A1&filters_rp_creation_time=%7B"start_month"%3A"2014-01"%2C"end_month"%3A"2014-04"%7D&filters_rp_author=public&filters_rp_chrono_sort=top'
# search query URL whith all filters needed
remDr$navigate(searchURL)
for(i in 1:100){
remDr$executeScript(paste("scroll(0,",i*1000,");"))
Sys.sleep(3)
}
# scroll down 100 times waiting 3 seconds each time for page to render
URLRaw <- remDr$findElements(using = 'css', "div._6a._5u5j._6b span.fsm.fwn.fcg a._5pcq")
Attr<-function(x)
{
x$getElementAttribute("href")
}
URL <- mapply(Attr,URLRaw)
getText<-function(x)
{
x$getElementText()
}
Date <- mapply(getText,URLRaw)
TextRaw <- remDr$findElements(using = 'css', "div.userContent p")
OriginalText <- mapply(getText,TextRaw)
Shared <- remDr$findElements(using = 'css', "div.mtm")
SharedText <- mapply(getText,Shared)
cbind.fill <- function(...){
nm <- list(...)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}
# function to deal with missing data (like no shared or original text)
# vectors needs to be of the similar length to be combined into matrix
# thus for each missing value the "NA" is inserted
data <- data.frame(cbind.fill(URL,Date,OriginalText,SharedText))
colnames(data) <- c("URL","Date","Original Text","SharedText")
write.table(data, "S:/Path for results to be saved/Data.csv", col.names = TRUE, sep=";")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment