首页 文章

无法使用Spark从Couchbase读取数据

提问于
浏览
0

我一直在尝试从couchbase读取数据,但由于身份验证问题而无法读取数据 .

import com.couchbase.client.java.document.JsonDocument
import org.apache.spark.sql.SparkSession
import com.couchbase.spark._

object SparkRead {



def main(args: Array[String]): Unit = {

  // The SparkSession is the main entry point into spark
  val spark = SparkSession
    .builder()
    .appName("KeyValueExample")
    .master("local[*]") // use the JVM as the master, great for testing
    .config("spark.couchbase.nodes", "***********") // connect to couchbase on hostname
    .config("spark.couchbase.bucket.beer-sample","") // open the travel-sample bucket with empty password
    .config("spark.couchbase.username", "couchdb")
    .config("spark.couchbase.password", "******")
    .config("spark.couchbase.connectTimeout","30000")
    .config("spark.couchbase.kvTimeout","10000")
    .config("spark.couchbase.socketConnect","10000")
    .getOrCreate()

  spark.sparkContext
    .couchbaseGet[com.couchbase.client.java.document.JsonDocument](Seq("airline_10123")) // Load documents from couchbase
    .collect() // collect all data from the spark workers
    .foreach(println) // print each document content


 }
}

Below is the Build File

name := "KafkaSparkCouchReadWrite"

organization := "my.clairvoyant"

version := "1.0.0-SNAPSHOT"

scalaVersion := "2.11.11"

libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % "2.1.0",
  "org.apache.spark" %% "spark-streaming" % "2.1.0",
  "org.apache.spark" %% "spark-sql" % "2.1.0",
  "org.apache.spark" % "spark-streaming-kafka-0-10_2.11" % "2.2.0",
  "com.couchbase.client" %% "spark-connector" % "2.1.0",
  "org.glassfish.hk2" % "hk2-utils" % "2.2.0-b27",
  "org.glassfish.hk2" % "hk2-locator" % "2.2.0-b27",
  "javax.validation" % "validation-api" % "1.1.0.Final",
  "org.apache.kafka" %% "kafka" % "0.11.0.0",
  "com.googlecode.json-simple" % "json-simple" % "1.1").map(_.excludeAll(ExclusionRule("org.glassfish.hk2"),ExclusionRule("javax.validation")))

ERROR LOG

17/12/12 15:18:35 INFO BlockManager:初始化BlockManager:BlockManagerId(驱动程序,192.168.33.220,52402,无)17/12/12 15:18:35 INFO SharedState:仓库路径为'file:/ Users /桑帕特/桌面/ GitClairvoyant / cpdl3-POC / KafkaSparkCouchReadWrite /火花仓库/” . 17/12/12 15:18:35 INFO CouchbaseCore:CouchbaseEnvironment:{sslEnabled = false,sslKeystoreFile ='null',sslKeystorePassword = false,sslKeystore = null,bootstrapHttpEnabled = true,bootstrapCarrierEnabled = true,bootstrapHttpDirectPort = 8091,bootstrapHttpSslPort = 18091, bootstrapCarrierDirectPort = 11210,bootstrapCarrierSslPort = 11207,ioPoolSize = 8,computationPufferSize = 8,responseBufferSize = 16384,requestBufferSize = 16384,kvServiceEndpoints = 1,viewServiceEndpoints = 12,queryServiceEndpoints = 12,searchServiceEndpoints = 12,ioPool = NioEventLoopGroup,kvIoPool = null,viewIoPool = null,searchIoPool = null,querySoPool = null,coreScheduler = CoreScheduler,memcachedHashingStrategy = DefaultMemcachedHashingStrategy,eventBus = DefaultEventBus,packageNameAndVersion = couchbase-java-client / 2.4.2(git:2.4.2,core:1.4.2),dcpEnabled = false ,retryStrategy = BestEffort,maxRequestLifetime = 75000,retryDelay = ExponentialDelay {growBy 1.0 MICROSECONDS,2的幂; lower = 100,upper = 100000},reconnectDelay = ExponentialDelay {growBy 1.0 MILLISECONDS,2的幂; lower = 32,upper = 4096},observeIntervalDelay = ExponentialDelay {growBy 1.0 MICROSECONDS,2的幂; lower = 10,upper = 100000},keepAliveInterval = 30000,autoreleaseAfter = 2000,bufferPoolingEnabled = true,tcpNodelayEnabled = true,mutationTokensEnabled = false,socketConnectTimeout = 1000,dcpConnectionBufferSize = 20971520,dcpConnectionBufferAckThreshold = 0.2,dcpConnectionName = dcp / core-io,callbacksOnIoPool = false,disconnectTimeout = 25000,requestBufferWaitStrategy = com.couchbase.client.core.env.DefaultCoreEnvironment$2 @7b7b3edb,queryTimeout = 75000,viewTimeout = 75000,kvTimeout = 2500,connectTimeout = 5000,dnsSrvEnabled = false} 17/12/12 15 :18:37 WARN endpoints :[null] [KeyValueEndpoint]:身份验证失败 . 17/12/12 15:18:37 INFO endpoints :[null] [KeyValueEndpoint]:从Channel通知为非活动状态,尝试重新连接 . 17/12/12 15:18:37 WARN ResponseStatusConverter:带有协议HTTP的未知ResponseStatus:401 17/12/12 15:18:37 WARN ResponseStatusConverter:带有协议HTTP的未知ResponseStatus:401线程“main”com.couchbase中的异常 . client.java.error.InvalidPasswordException:存储桶“beer-sample”的密码不匹配 . at com.couchbase.client.jient.CouchbaseAsyncCluster $ OpenBucketErrorHandler.call(CouchbaseAsyncCluster.java:601)at com.couchbase.client.jouch.CouchbaseAsyncCluster $ OpenBucketErrorHandler.call(CouchbaseAsyncCluster.java:584)at rx.internal.operators.OperatorOnErrorResumeNextViaFunction $ 4.onError(OperatorOnErrorResumeNextViaFunction.java:140)at rx.internal.operators.OnSubscribeMap $ MapSubscriber.onError(OnSubscribeMap.java:88)

1 回答

  • 0

    示例代码:

    import com.couchbase.client.java.document.JsonDocument
    import com.couchbase.spark._
    import org.apache.spark.sql.SparkSession
    
    object SparkReadCouchBase {
    
    def main(args: Array[String]): Unit = {
    
    val spark = SparkSession
      .builder()
      .appName("KeyValueExample")
      .master("local[*]") // use the JVM as the master, great for testing
      .config("spark.couchbase.nodes", "127.0.0.1") // connect to couchbase on hostname
      .config("spark.couchbase.bucket.travel-sample","") // open the travel-sample bucket with empty password
      .config("com.couchbase.username", "*******")
      .config("com.couchbase.password", "*******")
      .config("com.couchbase.kvTimeout","10000")
      .config("com.couchbase.connectTimeout","30000")
      .config("com.couchbase.socketConnect","10000")
      .getOrCreate()
    println("=====================================================================================")
    spark.sparkContext
      .couchbaseGet[JsonDocument](Seq("airline_10123")) // Load documents from couchbase
      .collect() // collect all data from the spark workers
      .foreach(println) // print each document content
    println("=====================================================================================")
    
    
      }
    }
    

    POM.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
      http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    
    <groupId>com.*******.*****</groupId>
    <artifactId>KafkaSparkCouch</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>
    
    <properties>
        <java.version>1.8</java.version>
        <spark.version>2.2.0</spark.version>
        <scala.version>2.11.8</scala.version>
        <scala.parent.version>2.11</scala.parent.version>
        <kafka.client.version>0.11.0.0</kafka.client.version>
        <fat.jar.name>SparkCouch</fat.jar.name>
        <scala.binary.version>2.11</scala.binary.version>
        <main.class>com.*******.demo.spark.couchbase.SparkReadCouchBaseTest</main.class>
    </properties>
    
    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.parent.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
    
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_${scala.parent.version}</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
    
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka-0-10_${scala.parent.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
    
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.parent.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
    
        <dependency>
            <groupId>com.couchbase.client</groupId>
            <artifactId>spark-connector_${scala.parent.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
    
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.client.version}</version>
        </dependency>
    
        <dependency>
            <groupId>com.googlecode.json-simple</groupId>
            <artifactId>json-simple</artifactId>
            <version>1.1.1</version>
        </dependency>
    
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
    
    </dependencies>
    
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>${java.version}</source>
                    <target>${java.version}</target>
                </configuration>
            </plugin>
            <!--Create fat-jar file-->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.4</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <version>2.15.2</version>
                <executions>
                    <execution>
                        <id>compile</id>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                        <phase>compile</phase>
                    </execution>
    
                    <execution>
                        <id>test-compile</id>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                        <phase>test-compile</phase>
                    </execution>
    
                    <execution>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.1.6</version>
                <configuration>
                    <scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
                <!-- other settings-->
            </plugin>
        </plugins>
    </build>
    <repositories>
        <repository>
            <id>mavencentral</id>
            <url>http://repo1.maven.org/maven2/</url>
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
        <repository>
            <id>scala</id>
            <name>Scala Tools</name>
            <url>http://scala-tools.org/repo-releases/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </repository>
    </repositories>
    
    <pluginRepositories>
        <pluginRepository>
            <id>scala</id>
            <name>Scala Tools</name>
            <url>http://scala-tools.org/repo-releases/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </pluginRepository>
    </pluginRepositories>
    <name>KafkaSparkCouch</name>
    

相关问题