BAEL-6967-decode-unicode-in-string (#14901)

* BAEL-6967-decode-unicode-in-string

* update unit test

---------

Co-authored-by: tienvn <tienvn@>
This commit is contained in:
vunamtien
2023-10-07 20:04:59 +07:00
committed by GitHub
parent e62aba145b
commit 3a816d6b3d
3 changed files with 74 additions and 0 deletions
@@ -0,0 +1,29 @@
package com.baeldung.commons.convertunicode;
import org.apache.commons.text.StringEscapeUtils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class UnicodeConverterUtil {
public static String decodeWithApacheCommons(String input) {
return StringEscapeUtils.unescapeJava(input);
}
public static String decodeWithPlainJava(String input) {
Pattern pattern = Pattern.compile("\\\\u[0-9a-fA-F]{4}");
Matcher matcher = pattern.matcher(input);
StringBuilder decodedString = new StringBuilder();
while (matcher.find()) {
String unicodeSequence = matcher.group();
char unicodeChar = (char) Integer.parseInt(unicodeSequence.substring(2), 16);
matcher.appendReplacement(decodedString, Character.toString(unicodeChar));
}
matcher.appendTail(decodedString);
return decodedString.toString();
}
}
@@ -0,0 +1,39 @@
package com.baeldung.commons.convertunicode;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class UnicodeConverterUnitTest {
@Test
public void whenInputHaveUnicodeSequences_ThenDecode() {
String encodedString = "\\u0048\\u0065\\u006C\\u006C\\u006F World";
String expectedDecodedString = "Hello World";
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
}
@Test
public void whenInputHaveNoUnicodeSequences_ThenDoNothing() {
String inputString = "Hello World";
assertEquals(inputString, UnicodeConverterUtil.decodeWithApacheCommons(inputString));
assertEquals(inputString, UnicodeConverterUtil.decodeWithPlainJava(inputString));
}
@Test
public void whenInputHaveUnicodeSequencesInMiddle_ThenDecode() {
String encodedString = "This is a test \\u0069\\u006E the middle.";
String expectedDecodedString = "This is a test in the middle.";
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
}
@Test
public void whenInputHaveMultipleUnicodeSequences_ThenDecode() {
String encodedString = "Unicode: \\u0048\\u0065\\u006C\\u006C\\u006F \\u0057\\u006F\\u0072\\u006C\\u0064";
String expectedDecodedString = "Unicode: Hello World";
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
}
}