joaomacalos/hashtag10-againstfavor.py

## hashtag10-againstfavor.py
# Get only hashtags in favor and against the superleague:
against_hashtag = classified_hashtags.hashtag[classified_hashtags.label==1]
favor_hashtag = classified_hashtags.hashtag[classified_hashtags.label==3]
# Filter against hashtags among all hashtags (to get the count of each one)
no_hashtags = [x for x in hashtags if x in list(against_hashtag)]
# Get top10 hashtags
top10_no_hashtags = list(pd.Series(no_hashtags).value_counts().head(10).index)
sl_tweets = (sl_tweets
             .assign(against=lambda y: [any(x in sublist for x in against_hashtag)
                                        for sublist in y.hashtags],
                     favor=lambda y: [any(x in sublist for x in favor_hashtag)
                                      for sublist in y.hashtags],
                     top10_against=lambda y: [any(x in sublist for x in top10_no_hashtags)
                                              for sublist in y.hashtags],
                     opinion=lambda x: np.where(x.against == True, 'Against',
                                               np.where(x.favor == True, 'Favor', np.nan)),
                     opinion_top10=lambda x: np.where(x.top10_against == True, 'Against',
                                               np.where(x.favor == True, 'Favor', np.nan)))
            )
all_count = sl_tweets.groupby('opinion')[['id']].count()
top10_count = sl_tweets.groupby('opinion_top10')[['id']].count()

pd.merge(all_count, top10_count,
        left_index=True, right_index=True).rename(columns={'id_x':'All', 'id_y':'Top 10'})
	# Get only hashtags in favor and against the superleague:
	against_hashtag = classified_hashtags.hashtag[classified_hashtags.label==1]
	favor_hashtag = classified_hashtags.hashtag[classified_hashtags.label==3]
	# Filter against hashtags among all hashtags (to get the count of each one)
	no_hashtags = [x for x in hashtags if x in list(against_hashtag)]
	# Get top10 hashtags
	top10_no_hashtags = list(pd.Series(no_hashtags).value_counts().head(10).index)
	sl_tweets = (sl_tweets
	.assign(against=lambda y: [any(x in sublist for x in against_hashtag)
	for sublist in y.hashtags],
	favor=lambda y: [any(x in sublist for x in favor_hashtag)
	for sublist in y.hashtags],
	top10_against=lambda y: [any(x in sublist for x in top10_no_hashtags)
	for sublist in y.hashtags],
	opinion=lambda x: np.where(x.against == True, 'Against',
	np.where(x.favor == True, 'Favor', np.nan)),
	opinion_top10=lambda x: np.where(x.top10_against == True, 'Against',
	np.where(x.favor == True, 'Favor', np.nan)))
	)
	all_count = sl_tweets.groupby('opinion')[['id']].count()
	top10_count = sl_tweets.groupby('opinion_top10')[['id']].count()

	pd.merge(all_count, top10_count,
	left_index=True, right_index=True).rename(columns={'id_x':'All', 'id_y':'Top 10'})
No results found