以下代码分析了Twitter的情绪:无论是积极的,消极的还是中性的 . 然而,对于许多推文来说这是相当不准确的,例如,如果它包括“有人给了他一个中指的saulte”,我想训练该程序以识别中指意味着不尊重,即使它在句子中包括敬礼这个词 .
任何建议,将不胜感激 .
从textblob导入TextBlob的tweepy导入OAuthHandler导入重新导入tweepy
class TwitterClient(object):
'''
Generic Twitter Class for sentiment analysis.
'''
def __init__(self):
'''
Class constructor or initialization method.
'''
# keys and tokens from the Twitter Dev Console
consumer_key = 'WHexAxkRn6uEJkzS2CKpeQejI'
consumer_secret = 'fSxjGVM247YS6Y6BpkWXaIfr6ThXdoSUg2y0aR259vNXVPPfob'
access_token = '915324744140025862-jnGvcTPkJHOObkeydiVburK8SdAngEk'
access_token_secret = 'JGgkWI9Lq0rJU1K0C8JLplRnSrEuw8pj3anOlIsn3YdiO'
# attempt authentication
try:
# create OAuthHandler object
self.auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
self.auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Failed")
def clean_tweet(self, tweet):
'''
Utility function to clean tweet text by removing links, special characters
using simple regex statements.
'''
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def get_tweet_sentiment(self, tweet):
'''
Utility function to classify sentiment of passed tweet
using textblob's sentiment method
'''
# create TextBlob object of passed tweet text
analysis = TextBlob(self.clean_tweet(tweet))
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
def get_tweets(self, query, count = 30):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []
try:
# call twitter api to fetch tweets
fetched_tweets = self.api.search(q = query, count = count)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)
# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)
# return parsed tweets
return tweets
except tweepy.TweepError as e:
# print error (if any)
print("Error : " + str(e))
def main():
# creating object of TwitterClient Class
api = TwitterClient()
# calling function to get tweets
tweets = api.get_tweets(query = 'Donald Trump', count = 200)
# picking positive tweets from tweets
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
# percentage of positive tweets
print("Positive tweets percentage: {} %".format(100*len(ptweets)/len(tweets)))
# picking negative tweets from tweets
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
# percentage of negative tweets
print("Negative tweets percentage: {} %".format(100*len(ntweets)/len(tweets)))
# percentage of neutral tweets
print("Neutral tweets percentage:{}%".format(100*(len(tweets) - len(ntweets) - len(ptweets))/len(tweets)))
# printing first 5 positive tweets
print("\n\nPositive tweets:")
for tweet in ptweets[:20]:
print(tweet['text'])
# printing first 5 negative tweets
print("\n\nNegative tweets:")
for tweet in ntweets[:20]:
print(tweet['text'])
if __name__ == "__main__":
# calling main function
main()