首页 文章

如何使用r进行多分类并行运行knn算法

提问于
浏览
1

我有一个多分类问题,我正在尝试运行KNN算法来找到每个数据点周围的50个最近邻居 . 我在R中使用了FNN包,但是由于我的数据集有大约2900万行,所以需要很长时间 . 我想知道R中是否有一个可以并行运行KNN的软件包 . 您对其使用示例有什么建议吗?

1 回答

  • 1
    you can use the following by modifying it accordig to KNN .. If need i will provided you with exact code .. the following code is for svc
    
    
    
    
    
    pkgs <- c('foreach', 'doParallel')
    
    lapply(pkgs, require, character.only = T)
    
    registerDoParallel(cores = 4)
    
    ### PREPARE FOR THE DATA ###
    
    df1 <- read.csv(...... your dataset path........)
    
    ## do normalization if needed ##
    
    
    ### SPLIT DATA INTO K FOLDS ###
    set.seed(2016)
    
    df1$fold <- caret::createFolds(1:nrow(df1), k = 10, list = FALSE)
    
    
    ### PARAMETER LIST ###
    cost <- 10^(-1:4)
    
    gamma <- 2^(-4:-1)
    
    parms <- expand.grid(cost = cost, gamma = gamma)
    
    ### LOOP THROUGH PARAMETER VALUES ###
    result <- foreach(i = 1:nrow(parms), .combine = rbind) %do% {
    
      c <- parms[i, ]$cost
    
      g <- parms[i, ]$gamma
    
      ### K-FOLD VALIDATION ###
    
      out <- foreach(j = 1:max(df1$fold), .combine = rbind, .inorder = FALSE) %dopar% {
    
    deve <- df1[df1$fold != j, ]
    
        test <- df1[df1$fold == j, ]
    
       mdl <- e1071::svm(Classification-type-column ~ ., data = deve, type = "C-classification", kernel = "radial", cost = c, gamma = g, probability = TRUE)
    
        pred <- predict(mdl, test, decision.values = TRUE, probability = TRUE)
        data.frame(y = test$DEFAULT, prob = attributes(pred)$probabilities[, 2])
    
      }
      ### CALCULATE SVM PERFORMANCE ###
    
      roc <- pROC::roc(as.factor(out$y), out$prob) 
    
      data.frame(parms[i, ], roc = roc$auc[1])
    
    }
    

相关问题