Moved Flink related code from libraries to libraries-data module. (#5253)

This commit is contained in:
Wosin
2018-09-15 13:14:57 +02:00
committed by Grzegorz Piwowarek
parent 33aa52ac94
commit 05e1700fb7
17 changed files with 71 additions and 49 deletions
@@ -1,103 +0,0 @@
package com.baeldung.flink;
import com.baeldung.flink.model.Backup;
import com.baeldung.flink.model.InputMessage;
import com.baeldung.flink.operator.BackupAggregator;
import com.baeldung.flink.operator.InputMessageTimestampAssigner;
import com.baeldung.flink.schema.BackupSerializationSchema;
import com.baeldung.flink.schema.InputMessageDeserializationSchema;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.apache.commons.collections.ListUtils;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.awaitility.Awaitility;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class BackupCreatorIntegrationTest {
public static ObjectMapper mapper;
@Before
public void setup() {
mapper = new ObjectMapper().registerModule(new JavaTimeModule());
}
@Test
public void givenProperJson_whenDeserializeIsInvoked_thenProperObjectIsReturned() throws IOException {
InputMessage message = new InputMessage("Me", "User", LocalDateTime.now(), "Test Message");
byte[] messageSerialized = mapper.writeValueAsBytes(message);
DeserializationSchema<InputMessage> deserializationSchema = new InputMessageDeserializationSchema();
InputMessage messageDeserialized = deserializationSchema.deserialize(messageSerialized);
assertEquals(message, messageDeserialized);
}
@Test
public void givenMultipleInputMessagesFromDifferentDays_whenBackupCreatorIsUser_thenMessagesAreGroupedProperly() throws Exception {
LocalDateTime currentTime = LocalDateTime.now();
InputMessage message = new InputMessage("Me", "User", currentTime, "First TestMessage");
InputMessage secondMessage = new InputMessage("Me", "User", currentTime.plusHours(1), "First TestMessage");
InputMessage thirdMessage = new InputMessage("Me", "User", currentTime.plusHours(2), "First TestMessage");
InputMessage fourthMessage = new InputMessage("Me", "User", currentTime.plusHours(3), "First TestMessage");
InputMessage fifthMessage = new InputMessage("Me", "User", currentTime.plusHours(25), "First TestMessage");
InputMessage sixthMessage = new InputMessage("Me", "User", currentTime.plusHours(26), "First TestMessage");
List<InputMessage> firstBackupMessages = Arrays.asList(message, secondMessage, thirdMessage, fourthMessage);
List<InputMessage> secondBackupMessages = Arrays.asList(fifthMessage, sixthMessage);
List<InputMessage> inputMessages = ListUtils.union(firstBackupMessages, secondBackupMessages);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStreamSource<InputMessage> testDataSet = env.fromCollection(inputMessages);
CollectingSink sink = new CollectingSink();
testDataSet.assignTimestampsAndWatermarks(new InputMessageTimestampAssigner())
.timeWindowAll(Time.hours(24))
.aggregate(new BackupAggregator())
.addSink(sink);
env.execute();
Awaitility.await().until(() -> sink.backups.size() == 2);
assertEquals(2, sink.backups.size());
assertEquals(firstBackupMessages, sink.backups.get(0).getInputMessages());
assertEquals(secondBackupMessages, sink.backups.get(1).getInputMessages());
}
@Test
public void givenProperBackupObject_whenSerializeIsInvoked_thenObjectIsProperlySerialized() throws IOException {
InputMessage message = new InputMessage("Me", "User", LocalDateTime.now(), "Test Message");
List<InputMessage> messages = Arrays.asList(message);
Backup backup = new Backup(messages, LocalDateTime.now());
byte[] backupSerialized = mapper.writeValueAsBytes(backup);
SerializationSchema<Backup> serializationSchema = new BackupSerializationSchema();
byte[] backupProcessed = serializationSchema.serialize(backup);
assertEquals(backupSerialized, backupProcessed);
}
private static class CollectingSink implements SinkFunction<Backup> {
public static List<Backup> backups = new ArrayList<>();
@Override
public synchronized void invoke(Backup value, Context context) throws Exception {
backups.add(value);
}
}
}
@@ -1,34 +0,0 @@
package com.baeldung.flink;
import com.baeldung.flink.operator.WordsCapitalizer;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class WordCapitalizerIntegrationTest {
@Test
public void givenDataSet_whenExecuteWordCapitalizer_thenReturnCapitalizedWords() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
List<String> data = Arrays.asList("dog", "cat", "wolf", "pig");
DataSet<String> testDataSet = env.fromCollection(data);
List<String> dataProcessed = testDataSet
.map(new WordsCapitalizer())
.collect();
List<String> testDataCapitalized = data.stream()
.map(String::toUpperCase)
.collect(Collectors.toList());
Assert.assertEquals(testDataCapitalized, dataProcessed);
}
}
@@ -1,161 +0,0 @@
package com.baeldung.flink;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.junit.Test;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
public class WordCountIntegrationTest {
private final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@Test
public void givenDataSet_whenExecuteWordCount_thenReturnWordCount() throws Exception {
// given
List<String> lines = Arrays.asList("This is a first sentence", "This is a second sentence with a one word");
// when
DataSet<Tuple2<String, Integer>> result = WordCount.startWordCount(env, lines);
// then
List<Tuple2<String, Integer>> collect = result.collect();
assertThat(collect).containsExactlyInAnyOrder(new Tuple2<>("a", 3), new Tuple2<>("sentence", 2), new Tuple2<>("word", 1), new Tuple2<>("is", 2), new Tuple2<>("this", 2), new Tuple2<>("second", 1), new Tuple2<>("first", 1), new Tuple2<>("with", 1),
new Tuple2<>("one", 1));
}
@Test
public void givenListOfAmounts_whenUseMapReduce_thenSumAmountsThatAreOnlyAboveThreshold() throws Exception {
// given
DataSet<Integer> amounts = env.fromElements(1, 29, 40, 50);
int threshold = 30;
// when
List<Integer> collect = amounts.filter(a -> a > threshold).reduce((integer, t1) -> integer + t1).collect();
// then
assertThat(collect.get(0)).isEqualTo(90);
}
@Test
public void givenDataSetOfComplexObjects_whenMapToGetOneField_thenReturnedListHaveProperElements() throws Exception {
// given
DataSet<Person> personDataSource = env.fromCollection(Arrays.asList(new Person(23, "Tom"), new Person(75, "Michael")));
// when
List<Integer> ages = personDataSource.map(p -> p.age).collect();
// then
assertThat(ages).hasSize(2);
assertThat(ages).contains(23, 75);
}
@Test
public void givenDataSet_whenSortItByOneField_thenShouldReturnSortedDataSet() throws Exception {
// given
Tuple2<Integer, String> secondPerson = new Tuple2<>(4, "Tom");
Tuple2<Integer, String> thirdPerson = new Tuple2<>(5, "Scott");
Tuple2<Integer, String> fourthPerson = new Tuple2<>(200, "Michael");
Tuple2<Integer, String> firstPerson = new Tuple2<>(1, "Jack");
DataSet<Tuple2<Integer, String>> transactions = env.fromElements(fourthPerson, secondPerson, thirdPerson, firstPerson);
// when
List<Tuple2<Integer, String>> sorted = transactions.sortPartition(new IdKeySelectorTransaction(), Order.ASCENDING).collect();
// then
assertThat(sorted).containsExactly(firstPerson, secondPerson, thirdPerson, fourthPerson);
}
@Test
public void giveTwoDataSets_whenJoinUsingId_thenProduceJoinedData() throws Exception {
// given
Tuple3<Integer, String, String> address = new Tuple3<>(1, "5th Avenue", "London");
DataSet<Tuple3<Integer, String, String>> addresses = env.fromElements(address);
Tuple2<Integer, String> firstTransaction = new Tuple2<>(1, "Transaction_1");
DataSet<Tuple2<Integer, String>> transactions = env.fromElements(firstTransaction, new Tuple2<>(12, "Transaction_2"));
// when
List<Tuple2<Tuple2<Integer, String>, Tuple3<Integer, String, String>>> joined = transactions.join(addresses).where(new IdKeySelectorTransaction()).equalTo(new IdKeySelectorAddress()).collect();
// then
assertThat(joined).hasSize(1);
assertThat(joined).contains(new Tuple2<>(firstTransaction, address));
}
@Test
public void givenStreamOfEvents_whenProcessEvents_thenShouldPrintResultsOnSinkOperation() throws Exception {
// given
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> text = env.fromElements("This is a first sentence", "This is a second sentence with a one word");
SingleOutputStreamOperator<String> upperCase = text.map(String::toUpperCase);
upperCase.print();
// when
env.execute();
}
@Test
public void givenStreamOfEvents_whenProcessEvents_thenShouldApplyWindowingOnTransformation() throws Exception {
// given
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
SingleOutputStreamOperator<Tuple2<Integer, Long>> windowed = env.fromElements(new Tuple2<>(16, ZonedDateTime.now().plusMinutes(25).toInstant().getEpochSecond()), new Tuple2<>(15, ZonedDateTime.now().plusMinutes(2).toInstant().getEpochSecond()))
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<Integer, Long>>(Time.seconds(20)) {
@Override
public long extractTimestamp(Tuple2<Integer, Long> element) {
return element.f1 * 1000;
}
});
SingleOutputStreamOperator<Tuple2<Integer, Long>> reduced = windowed.windowAll(TumblingEventTimeWindows.of(Time.seconds(5))).maxBy(0, true);
reduced.print();
// when
env.execute();
}
private static class IdKeySelectorTransaction implements KeySelector<Tuple2<Integer, String>, Integer> {
@Override
public Integer getKey(Tuple2<Integer, String> value) {
return value.f0;
}
}
private static class IdKeySelectorAddress implements KeySelector<Tuple3<Integer, String, String>, Integer> {
@Override
public Integer getKey(Tuple3<Integer, String, String> value) {
return value.f0;
}
}
private static class Person {
private final int age;
private final String name;
private Person(int age, String name) {
this.age = age;
this.name = name;
}
}
}