Last active
January 3, 2020 17:43
-
-
Save BenjaminFraser/fc4dd29549a75c93336822060d012ec8 to your computer and use it in GitHub Desktop.
An example of using NLTK VADAR sentiment analyser to perform sentiment analysis on a Pandas dataframe.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
sent_i = SentimentIntensityAnalyzer() | |
def vadar_sentiment(text): | |
""" Calculate and return the nltk vadar (lexicon method) sentiment """ | |
return sent_i.polarity_scores(text)['compound'] | |
# create new column for vadar compound sentiment score | |
news_sentiments['vadar compound'] = news_sentiments['title'].apply(vadar_sentiment) | |
def categorise_sentiment(sentiment, neg_threshold=-0.05, pos_threshold=0.05): | |
""" categorise the sentiment value as positive (1), negative (-1) | |
or neutral (0) based on given thresholds """ | |
if sentiment < neg_threshold: | |
label = 'negative' | |
elif sentiment > pos_threshold: | |
label = 'positive' | |
else: | |
label = 'neutral' | |
return label | |
# new col with vadar sentiment label based on vadar compound score | |
news_sentiments['vadar sentiment'] = news_sentiments['vadar compound'].apply(categorise_sentiment) | |
# plot distribution of article sentiments for each newspaper | |
plt.figure(figsize=(8,8)) | |
sns.countplot(x='newspaper', data=news_sentiments, hue='vadar sentiment') | |
plt.title('VADAR Sentiment Analysis', weight='bold') | |
plt.ylabel('Number of Articles', fontsize=10, weight='bold') | |
plt.xlabel('VADAR Sentiment', fontsize=10, weight='bold') | |
plt.legend(bbox_to_anchor=(1.02, 1.0), loc=2, borderaxespad=0.0) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment