package com.test.spark import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.kafka010._ import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe object SparkKafKaTest { def main(args: Array[String]): Unit = { var sparkConf = new SparkConf().setMaster("local[2]").setAppName("SparkKafKaTest") val ssc = new StreamingContext(sparkConf, Seconds(5)) val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "localhost:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "use_a_separate_group_id_for_each_stream", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val topics = Array("test") val stream = KafkaUtils.createDirectStream[String, String]( ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams) ) stream.map(record => (record.key, record.value)).print() stream.map(record => record.value).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print() // stream.map(_._2).count().print() ssc.start() ssc.awaitTermination() } }
1.依赖
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-10_2.11</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.module</groupId> <artifactId>jackson-module-scala_2.11</artifactId> <version>${jackson.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> <version>${jackson.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> <version>${jackson.version}</version> </dependency>
http://www.waitingfy.com/archives/4255
2. 用spark-submit 来提交
spark-submit \ --class com.test.spark.SparkKafKaTest \ --master local[2] \ --packages org.apache.spark:spark-streaming-kafka-0-10_2.11:2.3.0 \ ./target/spartktrain-1.0.jar \ localhost:9092 group1 test4255