From 3085db7b3047f446a3039e9461c6bf13f0658beb Mon Sep 17 00:00:00 2001 From: Nikhil Khatwani Date: Mon, 23 Oct 2017 11:46:20 +0530 Subject: [PATCH] Bael 1166 intro apache spark (#2812) * Changes for BAEL-1166 * Changes for BAEL_1166 * Changes for BAEL 1166 * Changes for BAEL 1166 --- apache-spark/pom.xml | 44 +++++++++++++++ .../src/main/java/com/baeldung/WordCount.java | 53 +++++++++++++++++++ .../src/main/resources/spark_example.txt | 3 ++ 3 files changed, 100 insertions(+) create mode 100644 apache-spark/pom.xml create mode 100644 apache-spark/src/main/java/com/baeldung/WordCount.java create mode 100644 apache-spark/src/main/resources/spark_example.txt diff --git a/apache-spark/pom.xml b/apache-spark/pom.xml new file mode 100644 index 0000000000..1f95150ee7 --- /dev/null +++ b/apache-spark/pom.xml @@ -0,0 +1,44 @@ + + 4.0.0 + + com.baeldung + apache-spark + 1.0-SNAPSHOT + jar + + apache-spark + http://maven.apache.org + + + UTF-8 + + + + + + org.apache.spark + spark-core_2.10 + 2.2.0 + + + junit + junit + 3.8.1 + test + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.7.0 + + 1.8 + 1.8 + + + + + diff --git a/apache-spark/src/main/java/com/baeldung/WordCount.java b/apache-spark/src/main/java/com/baeldung/WordCount.java new file mode 100644 index 0000000000..ec1dedcb69 --- /dev/null +++ b/apache-spark/src/main/java/com/baeldung/WordCount.java @@ -0,0 +1,53 @@ +package com.baeldung; + +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.*; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; + +import scala.Tuple2; + +public class WordCount { + + private static final Pattern SPACE = Pattern.compile(" "); + + public static void main(String[] args) throws Exception { + if (args.length < 1) { + System.err.println("Usage: JavaWordCount "); + System.exit(1); + } + SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount").setMaster("local"); + JavaSparkContext ctx = new JavaSparkContext(sparkConf); + JavaRDD lines = ctx.textFile(args[0], 1); + + JavaRDD words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()); + JavaPairRDD ones = words.mapToPair( + new PairFunction() { + @Override + public Tuple2 call(String s) { + return new Tuple2<>(s, 1); + } + }); + + JavaPairRDD counts = ones.reduceByKey( + new Function2() { + @Override + public Integer call(Integer i1, Integer i2) { + return i1 + i2; + } + }); + + List> output = counts.collect(); + for (Tuple2 tuple : output) { + System.out.println(tuple._1() + ": " + tuple._2()); + } + ctx.stop(); +} +} diff --git a/apache-spark/src/main/resources/spark_example.txt b/apache-spark/src/main/resources/spark_example.txt new file mode 100644 index 0000000000..10fd71dc31 --- /dev/null +++ b/apache-spark/src/main/resources/spark_example.txt @@ -0,0 +1,3 @@ +Hello from Baeldung +Keep Learning Spark +Bye from Baeldung \ No newline at end of file