首页 文章

在InputSplit for Hbase期间,Spark给出Null指针异常

提问于
浏览
5

我使用的是Spark 1.2.1,Hbase 0.98.10和Hadoop 2.6.0 . 从hbase检索数据时,我得到了一个空点异常 . 在下面找到堆栈跟踪 .

[sparkDriver-akka.actor.default-dispatcher-2] DEBUG NewHadoopRDD - 无法使用InputSplit#getLocationInfo . java.lang.NullPointerException:在scala.collection.mutable的scala.collection.mutable.ArrayOps $ ofRef $ .length $ extension(ArrayOps.scala:114)〜[scala-library-2.10.4.jar:na]中为null . 在Scala.collection.IndexedSeqOptimized $ class.foreach(IndexedSeqOptimized.scala:32)〜[scala-library-2.10]的ArrayOps $ ofRef.length(ArrayOps.scala:114)〜[scala-library-2.10.4.jar:na] .4.jar:na] at org.apache.spark.rdd.HadoopRDD的scala.collection.mutable.ArrayOps $ ofRef.foreach(ArrayOps.scala:108)〜[scala-library-2.10.4.jar:na] $ .convertSplitLocationInfo(HadoopRDD.scala:401)〜[spark-core_2.10-1.2.1.jar:1.2.1]在org.apache.spark.rdd.NewHadoopRDD.getPreferredLocations(NewHadoopRDD.scala:215)〜[spark -core_2.10-1.2.1.jar:1.2.1]在org.apache.spark.rdd.RDD $$ anonfun $ preferredLocations $ 2.apply(RDD.scala:234)[spark-core_2.10-1.2.1 .jar:1.2.1]在org.apache.spark.rdd.RDD $$ anonfun $ preferredLocations $ 2.apply(RDD.scala:234)[spark-core_2.10-1.2.1.jar:1.2.1] at at scala.Option.getOrElse(Option.scala:120)[scala-library-2.10.4.jar:na] org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:233)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler.org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal(DAGScheduler.scala:1326)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $ apply $ 2.apply $ mcVI $ sp(DAGScheduler.scala:1336)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache .spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $ apply $ 2.apply(DAGScheduler.scala:1335)[spark-core_2.10-1.2.1.jar :1.2.1]在org.apache.spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $ apply $ 2.apply(DAGScheduler.scala:1335)[spark- core_2.10-1.2.1.jar:1.2.1]在org.apache.spark的scala.collection.immutable.List.foreach(List.scala:318)[scala-library-2.10.4.jar:na] .scheduler.DA GScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2.apply(DAGScheduler.scala:1335)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark .scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2.apply(DAGScheduler.scala:1333)[spark-core_2.10-1.2.1.jar:1.2.1]在scala . collection.immutable.List.foreach(List.scala:318)[scala-library-2.10.4.jar:na] at org.apache.spark.scheduler.DAGScheduler.org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal (DAGScheduler.scala:1333)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $在$ org.apache.spark.scheduler.DAGScheduler $$ anonfun申请$ 2.apply $ mcVI $ sp(DAGScheduler.scala:1336)[spark-core_2.10-1.2.1.jar:1.2.1] $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $ apply $ 2.apply(DAGScheduler.scala:1335)[spark-core_2.10-1.2.1.jar: 1.2.1]在org.apache.spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2 $$ anonfun $ apply $ 2.apply(DAGScheduler.scala:1335)[spark-core_2 .10-1.2.1.jar:1.2.1]在org.apache.spark的scala.collection.immutable.List.foreach(List.scala:318)[scala-library-2.10.4.jar:na] . scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2.apply(DAGScheduler.scala:1335)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache .spark.scheduler.DAGScheduler $$ anonfun $ org $ apache $ spark $ scheduler $ DAGScheduler $$ getPreferredLocsInternal $ 2.apply(DAGScheduler.scala:1333)[spark-core_2.10-1.2.1.jar:1.2.1] at at scala.collection.immutable.List.foreach(List.scala:318)[scala-library-2.10.4.jar:na] at org.apache.spark.scheduler.DAGScheduler.org $ apache $ spark $ scheduler $ DAGScheduler $ $ getPreferredLocsInternal(DAGScheduler.scala:1333)[spark-core_2.10-1.2.1.jar:1.2.1] at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.sc) ala:1304)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler $$ anonfun $ 17.apply(DAGScheduler.scala:862)[spark-core_2.10 -1.2.1.jar:1.2.1] at org.apache.spark.scheduler.DAGScheduler $$ anonfun $ 17.apply(DAGScheduler.scala:859)[spark-core_2.10-1.2.1.jar:1.2.1 ] scala.collection.TraversableLike $$ anonfun $ map $ 1.apply(TraversableLike.scala:244)[scala-library-2.10.4.jar:na] at scala.collection.TraversableLike $$ anonfun $ map $ 1.apply( TraversableLike.scala:244)[scala-library-2.10.4.jar:na] at scala.collection.Iterator $ class.foreach(Iterator.scala:727)[scala-library-2.10.4.jar:na] at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)[scala-library-2.10.4.jar:na] at scala.collection.IterableLike $ class.foreach(IterableLike.scala:72)[scala-library-2.10.4.jar:na] at scala.collection.AbstractIterable . foreach(Iterable.scala:54)[scala-library-2.10.4.jar:na] at scala.collection.TraversableLike $ class.map(TraversableLike.scala:244)[scala-library-2.10.4.jar:na ] scala.collection.AbstractTraversable.map(Traversable.scala:105)[scala-library-2.10.4.jar:na] at org.apache.spark.scheduler.DAGScheduler.org $ apache $ spark $ scheduler $ DAGScheduler $ $ submitMissingTasks(DAGScheduler.scala:859)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler.org $ apache $ spark $ scheduler $ DAGScheduler $$ submitStage( DAGScheduler.scala:778)[spark-core_2.10-1.2.1.jar:1.2.1] org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)[spark-core_2.10-1.2 .jar:1.2.1] org.apache.spark.scheduler.DAGSchedulerEventProcessActor $$ anonfun $ receive $ 2.applyOrElse(DAGScheduler.scala:1389)[spark-core_2.10-1.2.1.ja r:1.2.1] at akka.actor.Actor $ class.aroundReceive(Actor.scala:465)[akka-actor_2.10-2.3.4-spark.jar:na] at org.apache.spark.scheduler.DAGSchedulerEventProcessActor .aroundReceive(DAGScheduler.scala:1375)[spark-core_2.10-1.2.1.jar:1.2.1] at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)[akka-actor_2.10-2.3 . 4-spark.jar:na] at akka.actor.ActorCell.invoke(ActorCell.scala:487)[akka-actor_2.10-2.3.4-spark.jar:na] at akka.dispatch.Mailbox.processMailbox(Mailbox) .scala:238)[akka-actor_2.10-2.3.4-spark.jar:na] at akka.dispatch.Mailbox.run(Mailbox.scala:220)[akka-actor_2.10-2.3.4-spark . jar:na] at akka.dispatch.ForkJoinExecutorConfigurator $ AkkaForkJoinTask.exec(AbstractDispatcher.scala:393)[akka-actor_2.10-2.3.4-spark.jar:na] at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask .java:260)[scala-library-2.10.4.jar:na]在scala.concurrent.forkjoin.ForkJoinPool $ WorkQueue.runTask(ForkJoinPool.java:1339)[scala-library-2.10.4.jar:na]在scala.concurrent.forkjoin.ForkJoinPool.runW orker(ForkJoinPool.java:1979)[scala-library-2.10.4.jar:na] at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)[scala-library-2.10.4.jar:na ]

请为我提供这个问题的解决方案 .

1 回答

  • 1

    异常是在getPreferredLocations阶段引发的,因此如果没有关于你的hbase的更多信息,我建议你看一下hbase.table.name和hbase.master(如果正确定义HMaster,那么最后一个我没有配置)想

相关问题