我试图用分类树概率绘制ROC曲线 . 但是，当我绘制曲线时，它不存在 . 我试图绘制ROC曲线，然后从曲线下面的区域找到AUC值 . 有谁知道如何解决这一问题？谢谢你，如果可以的话 . 二进制列表Risk代表风险错误分类，我认为这是我的标签 . 我应该在我的代码中的不同点应用ROC曲线方程吗？

这是数据框：

library(ROCR)

   data(Risk.table)

   pred = prediction(Risk.table$Predicted.prob, Risk.table2$Risk)
   perf = performance(pred, measure="tpr", x.measure="fpr")
   perf
   plot(perf)

   Predicted.prob Actual.prob   predicted actual Risk
  1       0.5384615   0.4615385        G8     V4    0
  2       0.1212121   0.8787879        V4     V4    1
  3       0.5384615   0.4615385        G8     G8    1
  4       0.9000000   0.1000000        G8     G8    1
  5       0.1212121   0.8787879        V4     V4    1
  6       0.1212121   0.8787879        V4     V4    1
  7       0.9000000   0.1000000        G8     G8    1
  8       0.5384615   0.4615385        G8     V4    0
  9       0.5384615   0.4615385        G8     V4    0
  10      0.1212121   0.8787879        V4     G8    0
  11      0.1212121   0.8787879        V4     V4    1
  12      0.9000000   0.1000000        G8     V4    0
  13      0.9000000   0.1000000        G8     V4    0
  14      0.1212121   0.8787879        G8     V4    1
  15      0.9000000   0.1000000        G8     G8    1
  16      0.5384615   0.4615385        G8     V4    0
  17      0.9000000   0.1000000        G8     V4    0
  18      0.1212121   0.8787879        V4     V4    1
  19      0.5384615   0.4615385        G8     V4    0
  20      0.1212121   0.8787879        V4     V4    1
  21      0.9000000   0.1000000        G8     G8    1
  22      0.5384615   0.4615385        G8     V4    0
  23      0.9000000   0.1000000        G8     V4    0
  24      0.1212121   0.8787879        V4     V4    1

这是此代码输出的ROC曲线，但缺少曲线：

enter image description here

我再试一次，这条ROC曲线错了

enter image description here

我使用下面的代码构造了上面的数据框：

包含所有数据的初始数据框称为shuffle.cross.validation2

#Split data 70:30 after shuffling the data frame

  index<-1:nrow(LDA.scores1)
  trainindex.LDA3=sample(index, trunc(length(index)*0.70),replace=FALSE)      

  LDA.70.trainset3<-shuffle.cross.validation2[trainindex.LDA3,]

  LDA.30.testset3<-shuffle.cross.validation2[-trainindex.LDA3,]

使用包rpart（）运行分类树

tree.split3<-rpart(Family~., data=LDA.70.trainset3, method="class")
 tree.split3
 summary(tree.split3)
 print(tree.split3)
 plot(tree.split3)
 text(tree.split3,use.n=T,digits=0)
 printcp(tree.split3)
 tree.split3

预测预测数据和实际数据

res3=predict(tree.split3,newdata=LDA.30.testset3)
 res4=as.data.frame(res3)

创建两列NA（实际和预测分类率）

res4$predicted<-NA
 res4$actual<-NA


 for (i in 1:length(res4$G8)){

 if(res4$R2[i]>res4$V4[i]) {
 res4$predicted[i]<-"G8"
 }

 else {
 res4$predicted[i]<-"V4"
 }

  print(i)
 }

 res4

 res4$actual<-LDA.30.testset3$Family
 res4
 Risk.table$Risk<-NA
 Risk.table

创建二进制预测变量列

for (i in 1:length(Risk.table$Risk)){

  if(Risk.table$predicted[i]==res4$actual[i]) {
  Risk.table$Risk[i]<-1
  }

  else {
  Risk.table$Risk[i]<-0
  }

  print(i)
  }

创建上述两个家庭V4和G8的预测概率和实际概率

#Confusion Matrix

    cm=table(res4$actual, res4$predicted)

    names(dimnames(cm))=c("actual", "predicted")

朴素贝叶斯

index<-1:nrow(significant.lda.Wilks2)
  trainindex.LDA.help1=sample(index, trunc(length(index)*0.70), replace=FALSE)                                     
  sig.train=significant.lda.Wilks2[trainindex.LDA.help1,]
  sig.test=significant.lda.Wilks2[-trainindex.LDA.help1,]


    library(klaR)
    nbmodel<-NaiveBayes(Family~., data=sig.train)
    prediction<-predict(nbmodel, sig.test)
    NB<-as.data.frame(prediction)
    colnames(NB)<-c("Actual", "Predicted.prob", "acual.prob")

    NB$actual2 = NA
    NB$actual2[NB$Actual=="G8"] = 1
    NB$actual2[NB$Actual=="V4"] = 0
    NB2<-as.data.frame(NB)

    plot(fit.perf, col="red"); #Naive Bayes
    plot(perf, col="blue", add=T); #Classification Tree
    abline(0,1,col="green")

enter image description here

使用插入符包的原始朴素贝叶斯代码

library(caret)
     library(e1071)

  train_control<-trainControl(method="repeatedcv", number=10, repeats=3)
  model<-train(Matriline~., data=LDA.scores, trControl=train_control,    method="nb")
  predictions <- predict(model, LDA.scores[,2:13])
  confusionMatrix(predictions,LDA.scores$Family)

结果

Confusion Matrix and Statistics

                        Reference
                Prediction V4 G8
                        V4 25  2
                        G8  5 48

                  Accuracy : 0.9125         
                    95% CI : (0.828, 0.9641)
       No Information Rate : 0.625          
       P-Value [Acc > NIR] : 4.918e-09      

                    Kappa : 0.8095         
   Mcnemar's Test P-Value : 0.4497         

              Sensitivity : 0.8333         
              Specificity : 0.9600         
           Pos Pred Value : 0.9259         
           Neg Pred Value : 0.9057         
               Prevalence : 0.3750         
           Detection Rate : 0.3125         
     Detection Prevalence : 0.3375         
        Balanced Accuracy : 0.8967         

         'Positive' Class : V4

1 回答

我有各种各样的事情需要指出：

1）我认为你的代码必须在你的rpart命令中 Family ~ . .

2）在您的初始表中，我可以在预测列中看到值 W3 . 这是否意味着您没有二进制因变量？ ROC曲线使用二进制数据，因此请检查它 .

3）您在初始表中的预测概率和实际概率总和为1.这是否合理？我认为它们代表了其他东西，因此您可以考虑更改名称，以防将来混淆您 .

4）我认为你对ROC如何工作以及它需要什么输入感到困惑 . 您的 Risk 列使用1表示正确的预测，使用0表示错误的预测 . 但是，ROC曲线需要1表示一个类，0表示另一个类 . 简单来说，命令是 prediction(predictions, labels) ，其中 predictions 是您预测的概率， labels 是您的因变量的真实类/级别 . 检查以下代码：

dt = read.table(text="
Id Predicted.prob Actual.prob   predicted actual Risk
1       0.5384615   0.4615385        G8     V4    0
2       0.1212121   0.8787879        V4     V4    1
3       0.5384615   0.4615385        G8     G8    1
4       0.9000000   0.1000000        G8     G8    1
5       0.1212121   0.8787879        V4     V4    1
6       0.1212121   0.8787879        V4     V4    1
7       0.9000000   0.1000000        G8     G8    1
8       0.5384615   0.4615385        G8     V4    0
9       0.5384615   0.4615385        G8     V4    0
10      0.1212121   0.8787879        V4     G8    0
11      0.1212121   0.8787879        V4     V4    1
12      0.9000000   0.1000000        G8     V4    0
13      0.9000000   0.1000000        G8     V4    0
14      0.1212121   0.8787879        W3     V4    1
15      0.9000000   0.1000000        G8     G8    1
16      0.5384615   0.4615385        G8     V4    0
17      0.9000000   0.1000000        G8     V4    0
18      0.1212121   0.8787879        V4     V4    1
19      0.5384615   0.4615385        G8     V4    0
20      0.1212121   0.8787879        V4     V4    1
21      0.9000000   0.1000000        G8     G8    1
22      0.5384615   0.4615385        G8     V4    0
23      0.9000000   0.1000000        G8     V4    0
24      0.1212121   0.8787879        V4     V4    1", header=T)

library(ROCR)

roc_pred <- prediction(dt$Predicted.prob, dt$Risk)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")

ROC曲线是：

enter image description here

当您创建一个新列 actual2 时，您有1而不是G8而0而不是V4：

dt$actual2 = NA
dt$actual2[dt$actual=="G8"] = 1
dt$actual2[dt$actual=="V4"] = 0

roc_pred <- prediction(dt$Predicted.prob, dt$actual2)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")

enter image description here

5）正如上面提到的@ eipi10，你应该试着摆脱代码中的for循环 .

回复于 2024-04-26T00:02:08+08:00

如何根据分类树概率绘制ROC曲线

这是此代码输出的ROC曲线，但缺少曲线：

我再试一次，这条ROC曲线错了

我使用下面的代码构造了上面的数据框：

包含所有数据的初始数据框称为shuffle.cross.validation2

使用包rpart（）运行分类树

预测预测数据和实际数据

创建两列NA（实际和预测分类率）

创建二进制预测变量列

创建上述两个家庭V4和G8的预测概率和实际概率

朴素贝叶斯

使用插入符包的原始朴素贝叶斯代码

结果

1 回答

相关问题