针对代码以下汽车的Twitter情绪分析的工作代码也可用于对其他主题进行情绪分析

install. packages (c("devtools", "rjson", "bit64", "httr", "plyr", "ggplot2", "doBy", "XML", "base64enc"))
library(devtools)
install_github ("geoffjentry/twitteR" , force=TRUE)
install_github ('R-package','quandl' , force=TRUE)
library(plyr) library(httr) library(doBy) library(Quandl) library(twitteR)
keys, and secrets
api_key <- "w0u2TyUtAga0dPLAc1huwwGYq"
api_secret <- "sO4kBhyLJEgSB2RaOLIH3qAZbfIjS582vyobM8HVRefLhQMF72" access_token <- "709310096- v2FkS6uT5cOFbQklVwiH3oeqKP7Hkw8lWjs7WQz5" access_token_secret <- "MGDyKHNeeZFhi9QL6AdkqRLLJtUd9gMjsCyQHxEnL3src"
setup_twitter_oauth(api_key, api_secret,access_token,access_token_secret)
hu.liu.pos = scan ('E:/nci/data warehouse/FINAL/positive-words.txt', what='character', comment. char=';')
hu.liu.neg = scan ('E:/nci/data warehouse/FINAL/negative-words.txt', what='character', comment.char=';')
pos.words = c(hu.liu.pos, 'awesome','good','favourite') neg.words = c(hu.liu.neg, 'wtf', 'boring', 'douzy', 'hilarious','terrible','fake','glorified','worst','bad','bad maintanance','depressed')
score.sentence <- function(sentence, pos.words, neg.words) { #here some basic cleaning
  sentence = gsub('[[:punct:]]', '', sentence) sentence = gsub('[[:cntrl::]]', '', sentence) sentence = gsub('\\d+', '', sentence) sentence = tolower(sentence)
  word.list = str_split(sentence, '\\s+') words = unlist(word.list)
  pos.matches = match(words, pos.words) neg.matches = match(words, neg.words)
  pos.matches = !is.na(pos.matches) neg.matches = !is.na(neg.matches)
  score = sum(pos.matches) - sum(neg.matches)
  return(score) }
analyses them
score.sentiment <- function(sentences, pos.words, neg.words) {
  require(plyr) require(stringr)
  score of 0 (neutral)
  scores = laply(sentences, function(sentence, pos.words, neg.words) {
    tryCatch(score.sentence(sentence, pos.words, neg.words ), error=function(e) 0)
  }, pos.words, neg.words)
  scores.df = data.frame(score=scores, text=sentences)
  return(scores.df) }
of the tweets returned
collect.and.score <- function (handle, sports, code, pos.words, neg.words) {
  tweets = searchTwitter(handle, n=1500) text = laply(tweets, function(t) t$getText())

  score = score.sentiment(text, pos.words, neg.words) score$sports = sports
  score$code = code
  return (score) }
Audi.scores = collect.and.score("@Audi","car", "Ac", pos.words, neg.words)
BMW.scores = collect.and.score("@BMW","car","Bc", pos.words, neg.words)
Ford.scores = collect.and.score("@Ford", "car", "Fc", pos.words, neg.words)
volvocarsglobal.scores = collect.and.score("@volvocarsglobal", "car", "Vc", pos.words, neg.words)
HMSGOfficial.scores = collect.and.score("@HMSGOfficial","car", "Hc", pos.words, neg.words)
Opel.scores = collect.and.score("@Opel","car", "Oc", pos.words, neg.words)
SKODAUK.scores = collect.and.score("@SKODAUK","car", "Sc", pos.words, neg.words)
ToyotaMotorCorp.scores = collect.and.score("@ToyotaMotorCorp","car", "Tc", pos.words, neg.words)
SuzukiCarsUK.scores = collect.and.score("@SuzukiCarsUK","car", "Scz", pos.words, neg.words)
UKVolkswagen.scores = collect.and.score("@UKVolkswagen","car", "Uc", pos.words, neg.words)
pos.words, neg.words)
all.scores = rbind(Audi.scores, BMW.scores, Ford.scores, volvocarsglobal.scores, HMSGOfficial.scores, Opel.scores, SKODAUK.scores, ToyotaMotorCorp.scores, UKVolkswagen.scores) all.scores
View(all.scores)
write.csv(all.scores,'Twitterrates')
near 0
all.scores$very.pos = as.numeric( all.scores$score >= 1) all.scores$score = as.numeric( all.scores$score <= -1)
the pos/neg sentiment scores for each airline

twitter.df = ddply(all.scores,c('car', 'code'), summarise, pos.count = sum (very.pos), neg.count = sum(very.neg))
twitter.df$all.count = twitter.df$pos.count + twitter.df$neg.count
sentiment score to be a percentage
twitter.df$score = round (100 * twitter.df$pos.count / twitter.df$all.count)
orderBy(~-score, twitter.df) View(orderBy(~-score, twitter.df))
write.csv(orderBy(~-score, twitter.df),file='ActorTwitterData.scores2.csv') write.csvfile(all.score,'unstructured data')

最近我在R中设计了一个相对简单的代码,通过使用被识别为正,负和中性的类别来分析Twitter帖子的内容 . 处理推文的想法基于一个演示文稿http://www.slideshare.net/ajayohri/twitter-analysis-by-kaify-rais . 该算法基于推文中的正面和负面词的数量来评估推文 . 推文中的单词与您可以在互联网上找到的词典中的单词相对应,但您可以自己创建列表 . 也可以编辑此列表或字典 . 干得好,但我发现了一些问题 .

Twitter的API有一些限制 . 这取决于您通过API访问的推文总数,但通常您可以在过去7-8天内收到推文(不会更长,也可能只有1-2天) . 访问推文的7到8天的时间限制会限制了解哪些活动或事件会影响推文或分析历史趋势 .

我创建了一个累积文件来绕过这个限制并累积历史数据 . 如果您定期访问推文,那么您可以通过以下图表分析交互的动态: