This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from_db = load_tweets_db() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| tweets = searchTwitter("#rstats", n=500) | |
| store_tweets_db(tweets) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| register_db_backend(dbi_connection) | |
| # or create a sqlite connection | |
| register_sqlite_backend("/path/to/sqlite/file") | |
| # or create a mysql connection | |
| register_mysql_backend("my_database", "hostname", "username", "password") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| load("code2013.rda") # 6028 tweets | |
| filtered_tweets = strip_retweets(code2013) # 5006 tweets | |
| statuses = sapply(filtered_tweets, function(x) x$getText()) | |
| # Read in the TIOBE data | |
| tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE) | |
| tiobe_langs = tolower(tiobe[, "lang"]) | |
| # Looking at the TIOBE listings and some of the tweet data, massage some of the entries |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| code2013_lang_table$code2013_rank = 1:nrow(code2013_lang_table) | |
| code2013_lang_table$tiobe_rank = match(code2013_lang_table$code2013_langs, tiobe[, "lang"]) | |
| # Make a scatterplot of the ranking differences | |
| png(file="code2013_tiobe_scatter.png", width=640, height=640) | |
| ggplot(code2013_lang_table, aes(x=code2013_rank, y=tiobe_rank, color=code2013_tier)) + | |
| geom_text(aes(label=code2013_langs), size=3) + | |
| ylab("TIOBE Rank") + xlab("#code2013 rank") + | |
| ggtitle("#code2013 vs TIOBE rankings") | |
| dev.off() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(ggplot2) | |
| png(file="code2013_tiobe.png", width=640, height=640) | |
| ggplot(code2013_lang_table, aes(x=code2013_langs, y=Count, fill=code2013_tier)) + | |
| geom_bar(stat="identity") + | |
| xlab("Language") + ylab("Count") + | |
| ggtitle("#code2013 Languages Sorted By TIOBE Rankings") + | |
| coord_flip() | |
| dev.off() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # tokenize each status. split on comma period or whitespace | |
| status_tokens = strsplit(statuses, ",|\\.|\\s+") | |
| matching_tokens = sapply(status_tokens, function(x) { | |
| x[which(x %in% tiobe_langs)] | |
| }) | |
| # Now have the languages mentioned in #code2013 which are in TIOBE | |
| code2013_langs = unlist(matching_tokens) | |
| code2013_lang_table = as.data.frame(sort(table(code2013_langs), decreasing=TRUE)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # I want to convert this all to lowercase but there are 67 with weird encodings | |
| bad_statuses = numeric() | |
| lowercase_statuses = character() | |
| for (i in seq_along(statuses)) { | |
| tl = try(tolower(statuses[[i]]), silent=TRUE) | |
| if (inherits(tl, "try-error")) { | |
| bad_statuses = c(bad_statuses, i) | |
| } else { | |
| lowercase_statuses = c(lowercase_statuses, tl) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Read in the TIOBE data | |
| tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE) | |
| tiobe_langs = tolower(tiobe[, "lang"]) | |
| # Looking at the TIOBE listings and some of the tweet data, massage some of the entries | |
| # here. This won't be perfect but will help a little bit | |
| replace_statuses = function(statuses, was, is) { | |
| gsub(was, is, statuses, ignore.case=TRUE) | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| load("code2013.rda") | |
| # Find/remove the tweets flagged as retweets | |
| is_retweets = which(sapply(code2013, function(x) x$getIsRetweet())) | |
| if (length(is_retweets) > 0) { | |
| filtered_tweets = code2013[-is_retweets] | |
| } else { | |
| filtered_tweets = code2013 | |
| } |