首页 文章

将spark决策树模型调试字符串转换为scala中的嵌套JSON

提问于
浏览
2

类似于引用here的树json解析,我试图在scala中实现决策树的简单可视化 . 它与数据库笔记本中的显示方法完全相同 .

我是scala的新手,并努力使逻辑正确 . 我知道我们必须进行递归调用来构建子节点并在显示最终预测值时中断 . 我在这里尝试使用下面提到的输入模型调试字符串的代码

def getStatmentType(x: String): (String, String) = {
    val ifPattern = "If+".r
    val ifelsePattern = "Else+".r
    var t = ifPattern.findFirstIn(x.toString)
    if(t != None){
      ("If", (x.toString).replace("If",""))
    }else {
      var ts = ifelsePattern.findFirstIn(x.toString)
      if(ts != None) ("Else", (x.toString).replace("Else", ""))
      else ("None", (x.toString).replace("(", "").replace(")",""))
    }
  }
  def delete[A](test:List[A])(i: Int) = test.take(i) ++ test.drop((i+1))
  def BuildJson(tree:List[String]):List[Map[String, Any]] = {
    var block:List[Map[String, Any]] = List()
    var lines:List[String] = tree
    loop.breakable {
      while (lines.length > 0) {
        println("here")
        var (cond, name) = getStatmentType(lines(0))
        println("initial" + cond)
        if (cond == "If") {
          println("if" + cond)
         // lines = lines.tail
          lines = delete(lines)(0)
          block = block :+ Map("if-name" -> name, "children" -> BuildJson(lines))
          println("After pop Else State"+lines(0))
          val (p_cond, p_name) = getStatmentType(lines(0))
         // println(p_cond + " = "+ p_name+ "\n")
          cond = p_cond
          name = p_name
          println(cond + " after="+ name+ "\n")
          if (cond == "Else") {
            println("else" + cond)
            lines = lines.tail
            block = block :+ Map("else-name" -> name, "children" -> BuildJson(lines))
          }
        }else if( cond == "None") {
          println(cond + "NONE")
          lines = delete(lines)(0)
          block = block :+ Map("predict" -> name)
        }else {
            println("Finaly Break")
            println("While loop--" +lines)
            loop.break()

        }
      }
    }
    block
  }

  def treeJson1(str: String):JsValue = {
   val str = "If (feature 0 in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0})\n   If (feature 0 in {6.0})\n      Predict: 17.0\n    Else (feature 0 not in {6.0})\n      Predict: 6.0\n  Else (feature 0 not in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0})\n   Predict: 20.0"
    val x = str.replace(" ","")
    val xs = x.split("\n").toList
    var js = BuildJson(xs)
    println(MapReader.mapToJson(js))
    Json.toJson("")
  }

预期产量:

[
  {
    'name': 'Root',
    'children': [
      {
    'name': 'feature 0 in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0}',
    'children': [
      {
        'name': 'feature 0 in {6.0}',
        'children': [
          {
            'name': 'Predict: 17.0'
          }
        ]
      },
      {
        'name': 'feature 0 not in {6.0}',
        'children': [
          {
            'name': 'Predict: 6.0'
          }
        ]
      }
    ]
  },
  {
    'name': 'feature 0 not in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0}',
    'children': [
      {
        'name': 'Predict: 20.0'
      }
    ]
  }
]

1 回答

  • 0

    您不需要解析调试字符串,而是可以从模型的根节点解析 . 参考enter link description here

相关问题