#include <iostream>
using namespace std;
int utf8_to_unicode(string utf8_code);
string unicode_to_utf8(int unicode);
string unicode_table(int min, int max, string separator, int elements_in_row);
int main()
{
cout << unicode_table(32, 10240, "", 32) << endl;
cout << unicode_to_utf8(36) << '\t';
cout << unicode_to_utf8(162) << '\t';
cout << unicode_to_utf8(8364) << '\t';
cout << unicode_to_utf8(128578) << endl;
cout << unicode_to_utf8(0x24) << '\t';
cout << unicode_to_utf8(0xa2) << '\t';
cout << unicode_to_utf8(0x20ac) << '\t';
cout << unicode_to_utf8(0x1f642) << endl;
cout << utf8_to_unicode("$") << '\t';
cout << utf8_to_unicode("¢") << '\t';
cout << utf8_to_unicode("€") << '\t';
cout << utf8_to_unicode("🙂") << endl;
cout << utf8_to_unicode("\x24") << '\t';
cout << utf8_to_unicode("\xc2\xa2") << '\t';
cout << utf8_to_unicode("\xe2\x82\xac") << '\t';
cout << utf8_to_unicode("\xf0\x9f\x99\x82") << endl;
return 0;
}
int utf8_to_unicode(string utf8_code)
{
unsigned utf8_size = utf8_code.length();
int unicode = 0;
for (unsigned p=0; p<utf8_size; ++p)
{
int bit_count = (p? 6: 8 - utf8_size - (utf8_size == 1? 0: 1)),
shift = (p < utf8_size - 1? (6*(utf8_size - p - 1)): 0);
for (int k=0; k<bit_count; ++k)
unicode += ((utf8_code[p] & (1 << k)) << shift);
}
return unicode;
}
string unicode_to_utf8(int unicode)
{
string s;
if (unicode>=0 and unicode <= 0x7f) // 7F(16) = 127(10)
{
s = static_cast<char>(unicode);
return s;
}
else if (unicode <= 0x7ff) // 7FF(16) = 2047(10)
{
unsigned char c1 = 192, c2 = 128;
for (int k=0; k<11; ++k)
{
if (k < 6) c2 |= (unicode % 64) & (1 << k);
else c1 |= (unicode >> 6) & (1 << (k - 6));
}
s = c1; s += c2;
return s;
}
else if (unicode <= 0xffff) // FFFF(16) = 65535(10)
{
unsigned char c1 = 224, c2 = 128, c3 = 128;
for (int k=0; k<16; ++k)
{
if (k < 6) c3 |= (unicode % 64) & (1 << k);
else if (k < 12) c2 |= (unicode >> 6) & (1 << (k - 6));
else c1 |= (unicode >> 12) & (1 << (k - 12));
}
s = c1; s += c2; s += c3;
return s;
}
else if (unicode <= 0x1fffff) // 1FFFFF(16) = 2097151(10)
{
unsigned char c1 = 240, c2 = 128, c3 = 128, c4 = 128;
for (int k=0; k<21; ++k)
{
if (k < 6) c4 |= (unicode % 64) & (1 << k);
else if (k < 12) c3 |= (unicode >> 6) & (1 << (k - 6));
else if (k < 18) c2 |= (unicode >> 12) & (1 << (k - 12));
else c1 |= (unicode >> 18) & (1 << (k - 18));
}
s = c1; s += c2; s += c3; s += c4;
return s;
}
else if (unicode <= 0x3ffffff) // 3FFFFFF(16) = 67108863(10)
{
; // i tak nie ma jeszcze kodów 5-bajtowych
}
else if (unicode <= 0x7fffffff) // 7FFFFFFF(16) = 2147483647(10)
{
; // i tak nie ma jeszcze kodów 6-bajtowych
}
else ; // błędny kod (< 0 albo > 2147483647)
return "";
}
string unicode_table(int min, int max, string separator, int elements_in_row)
{
string s;
cout << string(min%elements_in_row, ' ');
for (int k=min; k<=max; ++k)
s += unicode_to_utf8(k)
+ ((k + 1) % elements_in_row > 0? separator: "\n");
if ((max - min) % elements_in_row > 0)
s += '\n';
return s;
}
출처 - https://sites.google.com/view/technik-informatyk-nysa/porady/porady-c-cpp#h.p_u68nLns9HPff