Last active
January 1, 2021 22:18
-
-
Save raphsilva/db5e326540c247b85f5a100f4d8bc077 to your computer and use it in GitHub Desktop.
Reads data from a Twitter account (tweet.js) and returns information about tweets filtered by year.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from datetime import datetime | |
| YEAR = '2019' | |
| def str_to_datetime(str): | |
| return datetime.strptime(str, "%a %b %d %H:%M:%S %z %Y") | |
| file_texts = open('texts.txt', 'w') | |
| file_mentioned = open('mentioned.txt', 'w') | |
| file_retweeted = open('retweeted.txt', 'w') | |
| file_rts = open('RTs.txt', 'w') | |
| data_raw = open('tweet.js').read().replace('false', 'False').replace('true', 'True') | |
| while data_raw[0] != '[': | |
| data_raw = data_raw[1:] | |
| data_eval = eval(data_raw) | |
| data_dict = [] | |
| for d in data_eval: | |
| n = {} | |
| d = d['tweet'] # After API update (2019) | |
| for text in ['favorite_count', 'retweet_count', 'id', 'full_text', 'created_at']: | |
| n[text] = d[text] | |
| data_dict.append(n) | |
| data = pd.DataFrame(data_dict) | |
| data['year'] = data['created_at'].map(lambda s: s[-4:]) | |
| data = data[data['year'] == YEAR] # Filter by year | |
| data['retweet_count'] = data['retweet_count'].map(lambda s: int(s)) | |
| data['favorite_count'] = data['favorite_count'].map(lambda s: int(s)) | |
| data['popularity'] = data.apply(lambda i: i.favorite_count + i.retweet_count, axis=1) | |
| data = data.sort_values(by=['popularity'], ascending=False) | |
| tweets_texts = ([i for i in data['full_text']]) | |
| for text in tweets_texts: | |
| if 'RT' == text.split()[0]: | |
| file_retweeted.write(text.split()[1]) | |
| file_retweeted.write('\n') | |
| file_rts.write(str(text.encode('utf8', 'ignore').decode('utf8'))) | |
| file_rts.write('\n') | |
| else: | |
| file_texts.write(str(text.encode('utf8', 'ignore').decode('utf8'))) | |
| file_texts.write('\n') | |
| for w in text.split(): | |
| if w[0] == '@': | |
| file_mentioned.write(w) | |
| file_mentioned.write('\n') | |
| print('\n\n') | |
| print(' total tweets: %7d' % (len(data))) | |
| print(' RTs gotten: %7d' % (sum(data['retweet_count']))) | |
| print(' Favs gotten: %7d' % (sum(data['favorite_count']))) | |
| print('\n\n\n\nMOST POPULAR TWEETS\n\n\n') | |
| c = 0 | |
| for index, row in data.iterrows(): | |
| text = row['full_text'] | |
| text = str(text.encode('utf8', 'ignore').decode('utf8')) | |
| print(text) | |
| print(f" {row['created_at']}") | |
| print(f" RTs: {row['retweet_count']} Favs: {row['favorite_count']}") | |
| print(' ' + row['id']) | |
| print('\n') | |
| c += 1 | |
| if c > 7: | |
| exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment