BAEL-6967-decode-unicode-in-string (#14901)
* BAEL-6967-decode-unicode-in-string * update unit test --------- Co-authored-by: tienvn <tienvn@>
This commit is contained in:
+29
@@ -0,0 +1,29 @@
|
||||
package com.baeldung.commons.convertunicode;
|
||||
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class UnicodeConverterUtil {
|
||||
|
||||
public static String decodeWithApacheCommons(String input) {
|
||||
return StringEscapeUtils.unescapeJava(input);
|
||||
}
|
||||
|
||||
public static String decodeWithPlainJava(String input) {
|
||||
Pattern pattern = Pattern.compile("\\\\u[0-9a-fA-F]{4}");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
StringBuilder decodedString = new StringBuilder();
|
||||
|
||||
while (matcher.find()) {
|
||||
String unicodeSequence = matcher.group();
|
||||
char unicodeChar = (char) Integer.parseInt(unicodeSequence.substring(2), 16);
|
||||
matcher.appendReplacement(decodedString, Character.toString(unicodeChar));
|
||||
}
|
||||
|
||||
matcher.appendTail(decodedString);
|
||||
return decodedString.toString();
|
||||
}
|
||||
}
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
package com.baeldung.commons.convertunicode;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class UnicodeConverterUnitTest {
|
||||
|
||||
@Test
|
||||
public void whenInputHaveUnicodeSequences_ThenDecode() {
|
||||
String encodedString = "\\u0048\\u0065\\u006C\\u006C\\u006F World";
|
||||
String expectedDecodedString = "Hello World";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveNoUnicodeSequences_ThenDoNothing() {
|
||||
String inputString = "Hello World";
|
||||
assertEquals(inputString, UnicodeConverterUtil.decodeWithApacheCommons(inputString));
|
||||
assertEquals(inputString, UnicodeConverterUtil.decodeWithPlainJava(inputString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveUnicodeSequencesInMiddle_ThenDecode() {
|
||||
String encodedString = "This is a test \\u0069\\u006E the middle.";
|
||||
String expectedDecodedString = "This is a test in the middle.";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveMultipleUnicodeSequences_ThenDecode() {
|
||||
String encodedString = "Unicode: \\u0048\\u0065\\u006C\\u006C\\u006F \\u0057\\u006F\\u0072\\u006C\\u0064";
|
||||
String expectedDecodedString = "Unicode: Hello World";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user