Bael 766 flink (#1533)
* BAEL-756 code for flink article * reorder * simpler wordCount example * BAEL-766 changes according to PR * BAEL-766 change datasource to dataset * BAEL-766 add sorting example * BAEL-766 add simple streaming example * one missing change to dataSet * windowing example * add window example * add dependency explicitly * add plugin * add surefire plugin, change neme of the test to *IntegrationTest
This commit is contained in:
committed by
Grzegorz Piwowarek
parent
60332bb563
commit
a5de78c2b2
@@ -0,0 +1,20 @@
|
||||
package com.baeldung.flink;
|
||||
|
||||
import org.apache.flink.api.common.functions.FlatMapFunction;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.util.Collector;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
public class LineSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
|
||||
|
||||
@Override
|
||||
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
|
||||
|
||||
String[] tokens = value.toLowerCase().split("\\W+");
|
||||
Stream.of(tokens)
|
||||
.filter(t -> t.length() > 0)
|
||||
.forEach(token -> out.collect(new Tuple2<>(token, 1)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.baeldung.flink;
|
||||
|
||||
import org.apache.flink.api.java.DataSet;
|
||||
import org.apache.flink.api.java.ExecutionEnvironment;
|
||||
import org.apache.flink.api.java.aggregation.Aggregations;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class WordCount {
|
||||
|
||||
public static DataSet<Tuple2<String, Integer>> startWordCount(ExecutionEnvironment env, List<String> lines) throws Exception {
|
||||
DataSet<String> text = env.fromCollection(lines);
|
||||
|
||||
return text.flatMap(new LineSplitter())
|
||||
.groupBy(0)
|
||||
.aggregate(Aggregations.SUM, 1);
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user