Skip to content

Instantly share code, notes, and snippets.

@raphsilva
Last active January 1, 2021 22:18
Show Gist options
  • Select an option

  • Save raphsilva/db5e326540c247b85f5a100f4d8bc077 to your computer and use it in GitHub Desktop.

Select an option

Save raphsilva/db5e326540c247b85f5a100f4d8bc077 to your computer and use it in GitHub Desktop.
Reads data from a Twitter account (tweet.js) and returns information about tweets filtered by year.
import pandas as pd
from datetime import datetime
YEAR = '2019'
def str_to_datetime(str):
return datetime.strptime(str, "%a %b %d %H:%M:%S %z %Y")
file_texts = open('texts.txt', 'w')
file_mentioned = open('mentioned.txt', 'w')
file_retweeted = open('retweeted.txt', 'w')
file_rts = open('RTs.txt', 'w')
data_raw = open('tweet.js').read().replace('false', 'False').replace('true', 'True')
while data_raw[0] != '[':
data_raw = data_raw[1:]
data_eval = eval(data_raw)
data_dict = []
for d in data_eval:
n = {}
d = d['tweet'] # After API update (2019)
for text in ['favorite_count', 'retweet_count', 'id', 'full_text', 'created_at']:
n[text] = d[text]
data_dict.append(n)
data = pd.DataFrame(data_dict)
data['year'] = data['created_at'].map(lambda s: s[-4:])
data = data[data['year'] == YEAR] # Filter by year
data['retweet_count'] = data['retweet_count'].map(lambda s: int(s))
data['favorite_count'] = data['favorite_count'].map(lambda s: int(s))
data['popularity'] = data.apply(lambda i: i.favorite_count + i.retweet_count, axis=1)
data = data.sort_values(by=['popularity'], ascending=False)
tweets_texts = ([i for i in data['full_text']])
for text in tweets_texts:
if 'RT' == text.split()[0]:
file_retweeted.write(text.split()[1])
file_retweeted.write('\n')
file_rts.write(str(text.encode('utf8', 'ignore').decode('utf8')))
file_rts.write('\n')
else:
file_texts.write(str(text.encode('utf8', 'ignore').decode('utf8')))
file_texts.write('\n')
for w in text.split():
if w[0] == '@':
file_mentioned.write(w)
file_mentioned.write('\n')
print('\n\n')
print(' total tweets: %7d' % (len(data)))
print(' RTs gotten: %7d' % (sum(data['retweet_count'])))
print(' Favs gotten: %7d' % (sum(data['favorite_count'])))
print('\n\n\n\nMOST POPULAR TWEETS\n\n\n')
c = 0
for index, row in data.iterrows():
text = row['full_text']
text = str(text.encode('utf8', 'ignore').decode('utf8'))
print(text)
print(f" {row['created_at']}")
print(f" RTs: {row['retweet_count']} Favs: {row['favorite_count']}")
print(' ' + row['id'])
print('\n')
c += 1
if c > 7:
exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment