Unicode To UTF-8

Ever wondered what the actual Unicode-UTF algorithm was? i was debugging an issue and wrote it up to verify that the inbuilt Java converter was worker correctly. Here it is

public static void DumpUTF8(int c) {
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;

if (c < 0x80) {

b1 = (byte) (c >> 0 & 0x7F | 0x00);
b2 = 0;
b3 = 0;
b4 = 0;
} else if (c < 0x0800) {
b1 = (byte) (c >> 6 & 0x1F | 0xC0);
b2 = (byte) (c >> 0 & 0x3F | 0x80);
b3 = 0;
b4 = 0;
} else if (c < 0x010000) {
b1 = (byte) (c >> 12 & 0x0F | 0xE0);
b2 = (byte) (c >> 6 & 0x3F | 0x80);
b3 = (byte) (c >> 0 & 0x3F | 0x80);
b4 = 0;
} else if (c < 0x110000) {
b1 = (byte) (c >> 18 & 0x07 | 0xF0);
b2 = (byte) (c >> 12 & 0x3F | 0x80);
b3 = (byte) (c >> 6 & 0x3F | 0x80);
b4 = (byte) (c >> 0 & 0x3F | 0x80);
}

System.out.println("UTF dump: " + b1 + " " + b2 + " " + b3 + " " + b4);
}
Advertisements

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s