Unicode To UTF-8
Ever wondered what the actual Unicode-UTF algorithm was? i was debugging an issue and wrote it up to verify that the inbuilt Java converter was worker correctly. Here it is
public static void DumpUTF8(int c) {
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
if (c < 0x80) {
b1 = (byte) (c >> 0 & 0x7F | 0x00);
b2 = 0;
b3 = 0;
b4 = 0;
} else if (c < 0x0800) {
b1 = (byte) (c >> 6 & 0x1F | 0xC0);
b2 = (byte) (c >> 0 & 0x3F | 0x80);
b3 = 0;
b4 = 0;
} else if (c < 0x010000) {
b1 = (byte) (c >> 12 & 0x0F | 0xE0);
b2 = (byte) (c >> 6 & 0x3F | 0x80);
b3 = (byte) (c >> 0 & 0x3F | 0x80);
b4 = 0;
} else if (c < 0x110000) {
b1 = (byte) (c >> 18 & 0x07 | 0xF0);
b2 = (byte) (c >> 12 & 0x3F | 0x80);
b3 = (byte) (c >> 6 & 0x3F | 0x80);
b4 = (byte) (c >> 0 & 0x3F | 0x80);
}
System.out.println("UTF dump: " + b1 + " " + b2 + " " + b3 + " " + b4);
}

Leave a Reply
You must be logged in to post a comment.