diff --git a/libraries-apache-commons-2/pom.xml b/libraries-apache-commons-2/pom.xml index d771aac9ab..ee9b51e6cc 100644 --- a/libraries-apache-commons-2/pom.xml +++ b/libraries-apache-commons-2/pom.xml @@ -28,12 +28,18 @@ commons-vfs2 ${commons-vfs2.version} + + org.apache.commons + commons-text + ${apache-commons-text.version} + 1.23.0 1.10.13 2.9.0 + 1.10.0 \ No newline at end of file diff --git a/libraries-apache-commons-2/src/main/java/com/baeldung/commons/convertunicode/UnicodeConverterUtil.java b/libraries-apache-commons-2/src/main/java/com/baeldung/commons/convertunicode/UnicodeConverterUtil.java new file mode 100644 index 0000000000..c788f6ee61 --- /dev/null +++ b/libraries-apache-commons-2/src/main/java/com/baeldung/commons/convertunicode/UnicodeConverterUtil.java @@ -0,0 +1,29 @@ +package com.baeldung.commons.convertunicode; + +import org.apache.commons.text.StringEscapeUtils; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class UnicodeConverterUtil { + + public static String decodeWithApacheCommons(String input) { + return StringEscapeUtils.unescapeJava(input); + } + + public static String decodeWithPlainJava(String input) { + Pattern pattern = Pattern.compile("\\\\u[0-9a-fA-F]{4}"); + Matcher matcher = pattern.matcher(input); + + StringBuilder decodedString = new StringBuilder(); + + while (matcher.find()) { + String unicodeSequence = matcher.group(); + char unicodeChar = (char) Integer.parseInt(unicodeSequence.substring(2), 16); + matcher.appendReplacement(decodedString, Character.toString(unicodeChar)); + } + + matcher.appendTail(decodedString); + return decodedString.toString(); + } +} diff --git a/libraries-apache-commons-2/src/test/java/com/baeldung/commons/convertunicode/UnicodeConverterUnitTest.java b/libraries-apache-commons-2/src/test/java/com/baeldung/commons/convertunicode/UnicodeConverterUnitTest.java new file mode 100644 index 0000000000..f4a9bbcb77 --- /dev/null +++ b/libraries-apache-commons-2/src/test/java/com/baeldung/commons/convertunicode/UnicodeConverterUnitTest.java @@ -0,0 +1,39 @@ +package com.baeldung.commons.convertunicode; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class UnicodeConverterUnitTest { + + @Test + public void whenInputHaveUnicodeSequences_ThenDecode() { + String encodedString = "\\u0048\\u0065\\u006C\\u006C\\u006F World"; + String expectedDecodedString = "Hello World"; + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString)); + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString)); + } + + @Test + public void whenInputHaveNoUnicodeSequences_ThenDoNothing() { + String inputString = "Hello World"; + assertEquals(inputString, UnicodeConverterUtil.decodeWithApacheCommons(inputString)); + assertEquals(inputString, UnicodeConverterUtil.decodeWithPlainJava(inputString)); + } + + @Test + public void whenInputHaveUnicodeSequencesInMiddle_ThenDecode() { + String encodedString = "This is a test \\u0069\\u006E the middle."; + String expectedDecodedString = "This is a test in the middle."; + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString)); + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString)); + } + + @Test + public void whenInputHaveMultipleUnicodeSequences_ThenDecode() { + String encodedString = "Unicode: \\u0048\\u0065\\u006C\\u006C\\u006F \\u0057\\u006F\\u0072\\u006C\\u0064"; + String expectedDecodedString = "Unicode: Hello World"; + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString)); + assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString)); + } +}