[BAEL-3348] Moved code to algorithm-4

This commit is contained in:
dupirefr
2019-11-01 00:35:30 +01:00
parent db85c8f275
commit fee1da6091
20514 changed files with 1642355 additions and 0 deletions
@@ -0,0 +1,89 @@
package com.baeldung.crunch;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Calendar;
import org.apache.crunch.PCollection;
import org.apache.crunch.Pipeline;
import org.apache.crunch.Source;
import org.apache.crunch.Target;
import org.apache.crunch.impl.mem.MemPipeline;
import org.apache.crunch.io.From;
import org.apache.crunch.io.To;
import org.junit.Ignore;
import org.junit.Test;
public class MemPipelineUnitTest {
private static final String INPUT_FILE_PATH = "src/test/resources/crunch/input.txt";
@Test
public void givenPipeLineAndSource_whenSourceRead_thenExpectedNumberOfRecordsRead() {
Pipeline pipeline = MemPipeline.getInstance();
Source<String> source = From.textFile(INPUT_FILE_PATH);
PCollection<String> lines = pipeline.read(source);
assertEquals(21, lines.asCollection()
.getValue()
.size());
}
@Test
public void givenPipeLine_whenTextFileRead_thenExpectedNumberOfRecordsRead() {
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> lines = pipeline.readTextFile(INPUT_FILE_PATH);
assertEquals(21, lines.asCollection()
.getValue()
.size());
}
private String createOutputPath() throws IOException {
Path path = Files.createTempDirectory("test");
final String outputFilePath = path.toString() + File.separatorChar
+ "output.text";
return outputFilePath;
}
@Test
@Ignore("Requires Hadoop binaries")
public void givenCollection_whenWriteCalled_fileWrittenSuccessfully()
throws IOException {
PCollection<String> inputStrings = MemPipeline.collectionOf("Hello",
"Apache", "Crunch", Calendar.getInstance()
.toString());
final String outputFilePath = createOutputPath();
Target target = To.textFile(outputFilePath);
inputStrings.write(target);
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> lines = pipeline.readTextFile(outputFilePath);
assertIterableEquals(inputStrings.materialize(), lines.materialize());
}
@Test
@Ignore("Requires Hadoop binaries")
public void givenPipeLine_whenWriteTextFileCalled_fileWrittenSuccessfully()
throws IOException {
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> inputStrings = MemPipeline.collectionOf("Hello",
"Apache", "Crunch", Calendar.getInstance()
.toString());
final String outputFilePath = createOutputPath();
pipeline.writeTextFile(inputStrings, outputFilePath);
PCollection<String> lines = pipeline.readTextFile(outputFilePath);
assertIterableEquals(inputStrings.materialize(), lines.materialize());
}
}
@@ -0,0 +1,41 @@
package com.baeldung.crunch;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.apache.crunch.FilterFn;
import org.apache.crunch.PCollection;
import org.apache.crunch.impl.mem.MemPipeline;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
public class StopWordFilterUnitTest {
@Test
public void givenFilter_whenStopWordPassed_thenFalseReturned() {
FilterFn<String> filter = new StopWordFilter();
assertFalse(filter.accept("the"));
}
@Test
public void givenFilter_whenNonStopWordPassed_thenTrueReturned() {
FilterFn<String> filter = new StopWordFilter();
assertTrue(filter.accept("Hello"));
}
@Test
public void givenWordCollection_whenFiltered_thenStopWordsRemoved() {
PCollection<String> words = MemPipeline.collectionOf("This", "is", "a",
"test", "sentence");
PCollection<String> noStopWords = words.filter(new StopWordFilter());
assertEquals(ImmutableList.of("This", "test", "sentence"),
Lists.newArrayList(noStopWords.materialize()));
}
}
@@ -0,0 +1,21 @@
package com.baeldung.crunch;
import static org.junit.Assert.assertEquals;
import org.apache.crunch.impl.mem.emit.InMemoryEmitter;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
public class ToUpperCaseFnUnitTest {
@Test
public void givenString_whenToUpperCaseFnCalled_UpperCaseStringReturned() {
InMemoryEmitter<String> emitter = new InMemoryEmitter<String>();
new ToUpperCaseFn().process("input", emitter);
assertEquals(ImmutableList.of("INPUT"), emitter.getOutput());
}
}
@@ -0,0 +1,31 @@
package com.baeldung.crunch;
import static org.junit.Assert.assertEquals;
import org.apache.crunch.PCollection;
import org.apache.crunch.impl.mem.MemPipeline;
import org.apache.crunch.types.writable.Writables;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
public class ToUpperCaseWithCounterFnUnitTest {
@Before
public void setUp() throws Exception {
MemPipeline.clearCounters();
}
@Test
public void whenFunctionCalled_counterIncementendForChangedValues() {
PCollection<String> inputStrings = MemPipeline.collectionOf("This", "is", "a", "TEST", "string");
PCollection<String> upperCaseStrings = inputStrings.parallelDo(new ToUpperCaseWithCounterFn(), Writables.strings());
assertEquals(ImmutableList.of("THIS", "IS", "A", "TEST", "STRING"), Lists.newArrayList(upperCaseStrings.materialize()));
assertEquals(4L, MemPipeline.getCounters()
.findCounter("UpperCase", "modified")
.getValue());
}
}
@@ -0,0 +1,27 @@
package com.baeldung.crunch;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import org.apache.crunch.Emitter;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
@RunWith(MockitoJUnitRunner.class)
public class TokenizerUnitTest {
@Mock
private Emitter<String> emitter;
@Test
public void givenTokenizer_whenLineProcessed_thenOnlyExpectedWordsEmitted() {
Tokenizer splitter = new Tokenizer();
splitter.process(" hello world ", emitter);
verify(emitter).emit("hello");
verify(emitter).emit("world");
verifyNoMoreInteractions(emitter);
}
}