JAVA-6390: Move kafka articles from libraries-data-3 to new module

apache-kafka
This commit is contained in:
sampadawagde
2021-08-15 17:09:04 +05:30
parent 3f01b5fb0a
commit 0d280b54b9
20 changed files with 128 additions and 148 deletions
@@ -0,0 +1,70 @@
package com.baeldung.flink;
import com.baeldung.flink.model.Backup;
import com.baeldung.flink.model.InputMessage;
import com.baeldung.flink.operator.BackupAggregator;
import com.baeldung.flink.operator.InputMessageTimestampAssigner;
import com.baeldung.flink.operator.WordsCapitalizer;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
import static com.baeldung.flink.connector.Consumers.*;
import static com.baeldung.flink.connector.Producers.*;
public class FlinkDataPipeline {
public static void capitalize() throws Exception {
String inputTopic = "flink_input";
String outputTopic = "flink_output";
String consumerGroup = "baeldung";
String address = "localhost:9092";
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
FlinkKafkaConsumer011<String> flinkKafkaConsumer = createStringConsumerForTopic(inputTopic, address, consumerGroup);
flinkKafkaConsumer.setStartFromEarliest();
DataStream<String> stringInputStream = environment.addSource(flinkKafkaConsumer);
FlinkKafkaProducer011<String> flinkKafkaProducer = createStringProducer(outputTopic, address);
stringInputStream.map(new WordsCapitalizer())
.addSink(flinkKafkaProducer);
environment.execute();
}
public static void createBackup() throws Exception {
String inputTopic = "flink_input";
String outputTopic = "flink_output";
String consumerGroup = "baeldung";
String kafkaAddress = "localhost:9092";
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
environment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
FlinkKafkaConsumer011<InputMessage> flinkKafkaConsumer = createInputMessageConsumer(inputTopic, kafkaAddress, consumerGroup);
flinkKafkaConsumer.setStartFromEarliest();
flinkKafkaConsumer.assignTimestampsAndWatermarks(new InputMessageTimestampAssigner());
FlinkKafkaProducer011<Backup> flinkKafkaProducer = createBackupProducer(outputTopic, kafkaAddress);
DataStream<InputMessage> inputMessagesStream = environment.addSource(flinkKafkaConsumer);
inputMessagesStream.timeWindowAll(Time.hours(24))
.aggregate(new BackupAggregator())
.addSink(flinkKafkaProducer);
environment.execute();
}
public static void main(String[] args) throws Exception {
createBackup();
}
}
@@ -0,0 +1,29 @@
package com.baeldung.flink.connector;
import com.baeldung.flink.model.InputMessage;
import com.baeldung.flink.schema.InputMessageDeserializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
public class Consumers {
public static FlinkKafkaConsumer011<String> createStringConsumerForTopic(String topic, String kafkaAddress, String kafkaGroup) {
Properties props = new Properties();
props.setProperty("bootstrap.servers", kafkaAddress);
props.setProperty("group.id", kafkaGroup);
FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<>(topic, new SimpleStringSchema(), props);
return consumer;
}
public static FlinkKafkaConsumer011<InputMessage> createInputMessageConsumer(String topic, String kafkaAddress, String kafkaGroup) {
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", kafkaAddress);
properties.setProperty("group.id", kafkaGroup);
FlinkKafkaConsumer011<InputMessage> consumer = new FlinkKafkaConsumer011<InputMessage>(topic, new InputMessageDeserializationSchema(), properties);
return consumer;
}
}
@@ -0,0 +1,17 @@
package com.baeldung.flink.connector;
import com.baeldung.flink.model.Backup;
import com.baeldung.flink.schema.BackupSerializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
public class Producers {
public static FlinkKafkaProducer011<String> createStringProducer(String topic, String kafkaAddress) {
return new FlinkKafkaProducer011<>(kafkaAddress, topic, new SimpleStringSchema());
}
public static FlinkKafkaProducer011<Backup> createBackupProducer(String topic, String kafkaAddress) {
return new FlinkKafkaProducer011<Backup>(kafkaAddress, topic, new BackupSerializationSchema());
}
}
@@ -0,0 +1,27 @@
package com.baeldung.flink.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.time.LocalDateTime;
import java.util.List;
import java.util.UUID;
public class Backup {
@JsonProperty("inputMessages")
List<InputMessage> inputMessages;
@JsonProperty("backupTimestamp")
LocalDateTime backupTimestamp;
@JsonProperty("uuid")
UUID uuid;
public Backup(List<InputMessage> inputMessages, LocalDateTime backupTimestamp) {
this.inputMessages = inputMessages;
this.backupTimestamp = backupTimestamp;
this.uuid = UUID.randomUUID();
}
public List<InputMessage> getInputMessages() {
return inputMessages;
}
}
@@ -0,0 +1,71 @@
package com.baeldung.flink.model;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.google.common.base.Objects;
import java.time.LocalDateTime;
@JsonSerialize
public class InputMessage {
String sender;
String recipient;
LocalDateTime sentAt;
String message;
public InputMessage() {
}
public String getSender() {
return sender;
}
public void setSender(String sender) {
this.sender = sender;
}
public String getRecipient() {
return recipient;
}
public void setRecipient(String recipient) {
this.recipient = recipient;
}
public LocalDateTime getSentAt() {
return sentAt;
}
public void setSentAt(LocalDateTime sentAt) {
this.sentAt = sentAt;
}
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public InputMessage(String sender, String recipient, LocalDateTime sentAt, String message) {
this.sender = sender;
this.recipient = recipient;
this.sentAt = sentAt;
this.message = message;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
InputMessage message1 = (InputMessage) o;
return Objects.equal(sender, message1.sender) && Objects.equal(recipient, message1.recipient) && Objects.equal(sentAt, message1.sentAt) && Objects.equal(message, message1.message);
}
@Override
public int hashCode() {
return Objects.hashCode(sender, recipient, sentAt, message);
}
}
@@ -0,0 +1,34 @@
package com.baeldung.flink.operator;
import com.baeldung.flink.model.Backup;
import com.baeldung.flink.model.InputMessage;
import org.apache.flink.api.common.functions.AggregateFunction;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
public class BackupAggregator implements AggregateFunction<InputMessage, List<InputMessage>, Backup> {
@Override
public List<InputMessage> createAccumulator() {
return new ArrayList<>();
}
@Override
public List<InputMessage> add(InputMessage inputMessage, List<InputMessage> inputMessages) {
inputMessages.add(inputMessage);
return inputMessages;
}
@Override
public Backup getResult(List<InputMessage> inputMessages) {
Backup backup = new Backup(inputMessages, LocalDateTime.now());
return backup;
}
@Override
public List<InputMessage> merge(List<InputMessage> inputMessages, List<InputMessage> acc1) {
inputMessages.addAll(acc1);
return inputMessages;
}
}
@@ -0,0 +1,25 @@
package com.baeldung.flink.operator;
import com.baeldung.flink.model.InputMessage;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import javax.annotation.Nullable;
import java.time.ZoneId;
public class InputMessageTimestampAssigner implements AssignerWithPunctuatedWatermarks<InputMessage> {
@Override
public long extractTimestamp(InputMessage element, long previousElementTimestamp) {
ZoneId zoneId = ZoneId.systemDefault();
return element.getSentAt()
.atZone(zoneId)
.toEpochSecond() * 1000;
}
@Nullable
@Override
public Watermark checkAndGetNextWatermark(InputMessage lastElement, long extractedTimestamp) {
return new Watermark(extractedTimestamp - 15);
}
}
@@ -0,0 +1,11 @@
package com.baeldung.flink.operator;
import org.apache.flink.api.common.functions.MapFunction;
public class WordsCapitalizer implements MapFunction<String, String> {
@Override
public String map(String s) {
return s.toUpperCase();
}
}
@@ -0,0 +1,32 @@
package com.baeldung.flink.schema;
import com.baeldung.flink.model.Backup;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BackupSerializationSchema implements SerializationSchema<Backup> {
static ObjectMapper objectMapper = new ObjectMapper().registerModule(new JavaTimeModule());
Logger logger = LoggerFactory.getLogger(BackupSerializationSchema.class);
@Override
public byte[] serialize(Backup backupMessage) {
if (objectMapper == null) {
objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
objectMapper = new ObjectMapper().registerModule(new JavaTimeModule());
}
try {
String json = objectMapper.writeValueAsString(backupMessage);
return json.getBytes();
} catch (com.fasterxml.jackson.core.JsonProcessingException e) {
logger.error("Failed to parse JSON", e);
}
return new byte[0];
}
}
@@ -0,0 +1,30 @@
package com.baeldung.flink.schema;
import com.baeldung.flink.model.InputMessage;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import java.io.IOException;
public class InputMessageDeserializationSchema implements DeserializationSchema<InputMessage> {
static ObjectMapper objectMapper = new ObjectMapper().registerModule(new JavaTimeModule());
@Override
public InputMessage deserialize(byte[] bytes) throws IOException {
return objectMapper.readValue(bytes, InputMessage.class);
}
@Override
public boolean isEndOfStream(InputMessage inputMessage) {
return false;
}
@Override
public TypeInformation<InputMessage> getProducedType() {
return TypeInformation.of(InputMessage.class);
}
}
@@ -0,0 +1,28 @@
package com.baeldung.kafka.consumer;
class CountryPopulation {
private String country;
private Integer population;
public CountryPopulation(String country, Integer population) {
this.country = country;
this.population = population;
}
public String getCountry() {
return country;
}
public void setCountry(String country) {
this.country = country;
}
public Integer getPopulation() {
return population;
}
public void setPopulation(Integer population) {
this.population = population;
}
}
@@ -0,0 +1,58 @@
package com.baeldung.kafka.consumer;
import java.time.Duration;
import java.util.Collections;
import java.util.stream.StreamSupport;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CountryPopulationConsumer {
private static Logger logger = LoggerFactory.getLogger(CountryPopulationConsumer.class);
private Consumer<String, Integer> consumer;
private java.util.function.Consumer<Throwable> exceptionConsumer;
private java.util.function.Consumer<CountryPopulation> countryPopulationConsumer;
public CountryPopulationConsumer(Consumer<String, Integer> consumer, java.util.function.Consumer<Throwable> exceptionConsumer, java.util.function.Consumer<CountryPopulation> countryPopulationConsumer) {
this.consumer = consumer;
this.exceptionConsumer = exceptionConsumer;
this.countryPopulationConsumer = countryPopulationConsumer;
}
void startBySubscribing(String topic) {
consume(() -> consumer.subscribe(Collections.singleton(topic)));
}
void startByAssigning(String topic, int partition) {
consume(() -> consumer.assign(Collections.singleton(new TopicPartition(topic, partition))));
}
private void consume(Runnable beforePollingTask) {
try {
beforePollingTask.run();
while (true) {
ConsumerRecords<String, Integer> records = consumer.poll(Duration.ofMillis(1000));
StreamSupport.stream(records.spliterator(), false)
.map(record -> new CountryPopulation(record.key(), record.value()))
.forEach(countryPopulationConsumer);
consumer.commitSync();
}
} catch (WakeupException e) {
logger.info("Shutting down...");
} catch (RuntimeException ex) {
exceptionConsumer.accept(ex);
} finally {
consumer.close();
}
}
public void stop() {
consumer.wakeup();
}
}
@@ -0,0 +1,17 @@
package com.baeldung.kafka.producer;
import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
import org.apache.kafka.common.Cluster;
public class EvenOddPartitioner extends DefaultPartitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
if (((String) key).length() % 2 == 0) {
return 0;
}
return 1;
}
}
@@ -0,0 +1,38 @@
package com.baeldung.kafka.producer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import java.util.concurrent.Future;
public class KafkaProducer {
private final Producer<String, String> producer;
public KafkaProducer(Producer<String, String> producer) {
this.producer = producer;
}
public Future<RecordMetadata> send(String key, String value) {
ProducerRecord record = new ProducerRecord("topic_sports_news", key, value);
return producer.send(record);
}
public void flush() {
producer.flush();
}
public void beginTransaction() {
producer.beginTransaction();
}
public void initTransaction() {
producer.initTransactions();
}
public void commitTransaction() {
producer.commitTransaction();
}
}