UTF-8 conversion functions designed for use in PHP-GTK apps Adam Rambousek - rambousek@volny.cz version history: 1.03 --- 12/02/2002 * added Win1257 support 1.02 --- 30/11/2001 * added ISO8859-1 support 1.01 * added Win1250 support 1.00 string to_utf8(string string [, string charset]) string from_utf8(string string [, string charset]) supported charsets: name of charset you must use in script ISO8859-2: iso2 (this is the default charset, you don't have to specify it) Windows1250: win1250 ISO8859-1: iso1 Windows1257: win1257 example: $new_string=to_utf8($some_string,"win1250"); */ /* translation table - actually, it's array where key is hexadecimal number of character in ISO8859-2/Windows1250 and value is its two byte representation in UTF-8 */ class Encoding { function __construct() { $this->CharTable = array( 'iso2' => array( 0x80=>"\xc2\x80", 0x81=>"\xc2\x81", 0x82=>"\xc2\x82", 0x83=>"\xc2\x83", 0x84=>"\xc2\x84", 0x85=>"\xc2\x85", 0x86=>"\xc2\x86", 0x87=>"\xc2\x87", 0x88=>"\xc2\x88", 0x89=>"\xc2\x89", 0x8A=>"\xc2\x8a", 0x8B=>"\xc2\x8b", 0x8C=>"\xc2\x8c", 0x8D=>"\xc2\x8d", 0x8E=>"\xc2\x8e", 0x8F=>"\xc2\x8f", 0x90=>"\xc2\x90", 0x91=>"\xc2\x91", 0x92=>"\xc2\x92", 0x93=>"\xc2\x93", 0x94=>"\xc2\x94", 0x95=>"\xc2\x95", 0x96=>"\xc2\x96", 0x97=>"\xc2\x97", 0x98=>"\xc2\x98", 0x99=>"\xc2\x99", 0x9A=>"\xc2\x9a", 0x9B=>"\xc2\x9b", 0x9C=>"\xc2\x9c", 0x9D=>"\xc2\x9d", 0x9E=>"\xc2\x9e", 0x9F=>"\xc2\x9f", 0xA0=>"\xc2\xa0", 0xA1=>"\xc4\x84", 0xA2=>"\xcb\x98", 0xA3=>"\xc5\x81", 0xA4=>"\xc2\xa4", 0xA5=>"\xc4\xbd", 0xA6=>"\xc5\x9a", 0xA7=>"\xc2\xa7", 0xA8=>"\xc2\xa8", 0xA9=>"\xc5\xa0", 0xAA=>"\xc5\x9e", 0xAB=>"\xc5\xa4", 0xAC=>"\xc5\xb9", 0xAD=>"\xc2\xad", 0xAE=>"\xc5\xbd", 0xAF=>"\xc5\xbb", 0xB0=>"\xc2\xb0", 0xB1=>"\xc4\x85", 0xB2=>"\xcb\x9b", 0xB3=>"\xc5\x82", 0xB4=>"\xc2\xb4", 0xB5=>"\xc4\xbe", 0xB6=>"\xc5\x9b", 0xB7=>"\xcb\x87", 0xB8=>"\xc2\xb8", 0xB9=>"\xc5\xa1", 0xBA=>"\xc5\x9f", 0xBB=>"\xc5\xa5", 0xBC=>"\xc5\xba", 0xBD=>"\xcb\x9d", 0xBE=>"\xc5\xbe", 0xBF=>"\xc5\xbc", 0xC0=>"\xc5\x94", 0xC1=>"\xc3\x81", 0xC2=>"\xc3\x82", 0xC3=>"\xc4\x82", 0xC4=>"\xc3\x84", 0xC5=>"\xc4\xb9", 0xC6=>"\xc4\x86", 0xC7=>"\xc3\x87", 0xC8=>"\xc4\x8c", 0xC9=>"\xc3\x89", 0xCA=>"\xc4\x98", 0xCB=>"\xc3\x8b", 0xCC=>"\xc4\x9a", 0xCD=>"\xc3\x8d", 0xCE=>"\xc3\x8e", 0xCF=>"\xc4\x8e", 0xD0=>"\xc4\x90", 0xD1=>"\xc5\x83", 0xD2=>"\xc5\x87", 0xD3=>"\xc3\x93", 0xD4=>"\xc3\x94", 0xD5=>"\xc5\x90", 0xD6=>"\xc3\x96", 0xD7=>"\xc3\x97", 0xD8=>"\xc5\x98", 0xD9=>"\xc5\xae", 0xDA=>"\xc3\x9a", 0xDB=>"\xc5\xb0", 0xDC=>"\xc3\x9c", 0xDD=>"\xc3\x9d", 0xDE=>"\xc5\xa2", 0xDF=>"\xc3\x9f", 0xE0=>"\xc5\x95", 0xE1=>"\xc3\xa1", 0xE2=>"\xc3\xa2", 0xE3=>"\xc4\x83", 0xE4=>"\xc3\xa4", 0xE5=>"\xc4\xba", 0xE6=>"\xc4\x87", 0xE7=>"\xc3\xa7", 0xE8=>"\xc4\x8d", 0xE9=>"\xc3\xa9", 0xEA=>"\xc4\x99", 0xEB=>"\xc3\xab", 0xEC=>"\xc4\x9b", 0xED=>"\xc3\xad", 0xEE=>"\xc3\xae", 0xEF=>"\xc4\x8f", 0xF0=>"\xc4\x91", 0xF1=>"\xc5\x84", 0xF2=>"\xc5\x88", 0xF3=>"\xc3\xb3", 0xF4=>"\xc3\xb4", 0xF5=>"\xc5\x91", 0xF6=>"\xc3\xb6", 0xF7=>"\xc3\xb7", 0xF8=>"\xc5\x99", 0xF9=>"\xc5\xaf", 0xFA=>"\xc3\xba", 0xFB=>"\xc5\xb1", 0xFC=>"\xc3\xbc", 0xFD=>"\xc3\xbd", 0xFE=>"\xc5\xa3", 0xFF=>"\xcb\x99" ), 'win1250' => array( 0x80=>"\xc2\x80", 0x81=>"\xc2\x81", 0x82=>"\xe2\x80\x9a", 0x83=>"\xc2\x83", 0x84=>"\xe2\x80\x9e", 0x85=>"\xe2\x80\xa6", 0x86=>"\xe2\x80\xa0", 0x87=>"\xe2\x80\xa1", 0x88=>"\xc2\x88", 0x89=>"\xe2\x80\xb0", 0x8a=>"\xc5\xa0", 0x8b=>"\xe2\x80\xb9", 0x8c=>"\xc5\x9a", 0x8d=>"\xc5\xa4", 0x8e=>"\xc5\xbd", 0x8f=>"\xc5\xb9", 0x90=>"\xc2\x90", 0x91=>"\xe2\x80\x98", 0x92=>"\xe2\x80\x99", 0x93=>"\xe2\x80\x9c", 0x94=>"\xe2\x80\x9d", 0x95=>"\xe2\x80\xa2", 0x96=>"\xe2\x80\x93", 0x97=>"\xe2\x80\x94", 0x98=>"\xe2\x80\x98", 0x99=>"\xe2\x84\xa2", 0x9a=>"\xc5\xa1", 0x9b=>"\xe2\x80\xba", 0x9c=>"\xc5\x9b", 0x9d=>"\xc5\xa5", 0x9e=>"\xc5\xbe", 0x9f=>"\xc5\xba", 0xa0=>"\xc2\xa0", 0xa1=>"\xcb\x87", 0xa2=>"\xcb\x98", 0xa3=>"\xc5\x81", 0xa4=>"\xc2\xa4", 0xa5=>"\xc4\x84", 0xa6=>"\xc2\xa6", 0xa7=>"\xc2\xa7", 0xa8=>"\xc2\xa8", 0xa9=>"\xc2\xa9", 0xaa=>"\xc5\x9e", 0xab=>"\xc2\xab", 0xac=>"\xc2\xac", 0xad=>"\xc2\xad", 0xae=>"\xc2\xae", 0xaf=>"\xc5\xbb", 0xb0=>"\xc2\xb0", 0xb1=>"\xc2\xb1", 0xb2=>"\xcb\x9b", 0xb3=>"\xc5\x82", 0xb4=>"\xc2\xb4", 0xb5=>"\xc2\xb5", 0xb6=>"\xc2\xb6", 0xb7=>"\xc2\xb7", 0xb8=>"\xc2\xb8", 0xb9=>"\xc4\x85", 0xba=>"\xc5\x9f", 0xbb=>"\xc2\xbb", 0xbc=>"\xc4\xbd", 0xbd=>"\xcb\x9d", 0xbe=>"\xc4\xbe", 0xbf=>"\xc5\xbc", 0xc0=>"\xc5\x94", 0xc1=>"\xc3\x81", 0xc2=>"\xc3\x82", 0xc3=>"\xc4\x82", 0xc4=>"\xc3\x84", 0xc5=>"\xc4\xb9", 0xc6=>"\xc4\x86", 0xc7=>"\xc3\x87", 0xc8=>"\xc4\x8c", 0xc9=>"\xc3\x89", 0xca=>"\xc4\x98", 0xcb=>"\xc3\x8b", 0xcc=>"\xc4\x9a", 0xcd=>"\xc3\x8d", 0xce=>"\xc3\x8e", 0xcf=>"\xc4\x8e", 0xd0=>"\xc4\x90", 0xd1=>"\xc5\x83", 0xd2=>"\xc5\x87", 0xd3=>"\xc3\x93", 0xd4=>"\xc3\x94", 0xd5=>"\xc5\x90", 0xd6=>"\xc3\x96", 0xd7=>"\xc3\x97", 0xd8=>"\xc5\x98", 0xd9=>"\xc5\xae", 0xda=>"\xc3\x9a", 0xdb=>"\xc5\xb0", 0xdc=>"\xc3\x9c", 0xdd=>"\xc3\x9d", 0xde=>"\xc5\xa2", 0xdf=>"\xc3\x9f", 0xe0=>"\xc5\x95", 0xe1=>"\xc3\xa1", 0xe2=>"\xc3\xa2", 0xe3=>"\xc4\x83", 0xe4=>"\xc3\xa4", 0xe5=>"\xc4\xba", 0xe6=>"\xc4\x87", 0xe7=>"\xc3\xa7", 0xe8=>"\xc4\x8d", 0xe9=>"\xc3\xa9", 0xea=>"\xc4\x99", 0xeb=>"\xc3\xab", 0xec=>"\xc4\x9b", 0xed=>"\xc3\xad", 0xee=>"\xc3\xae", 0xef=>"\xc4\x8f", 0xf0=>"\xc4\x91", 0xf1=>"\xc5\x84", 0xf2=>"\xc5\x88", 0xf3=>"\xc3\xb3", 0xf4=>"\xc3\xb4", 0xf5=>"\xc5\x91", 0xf6=>"\xc3\xb6", 0xf7=>"\xc3\xb7", 0xf8=>"\xc5\x99", 0xf9=>"\xc5\xaf", 0xfa=>"\xc3\xba", 0xfb=>"\xc5\xb1", 0xfc=>"\xc3\xbc", 0xfd=>"\xc3\xbd", 0xfe=>"\xc5\xa3", 0xff=>"\xcb\x99" ), 'iso1' => array( 0xA0=>"\xc2\xa0", 0xA1=>"\xc2\xa1", 0xA2=>"\xc2\xa2", 0xA3=>"\xc2\xa3", 0xA4=>"\xc2\xa4", 0xA5=>"\xc2\xa5", 0xA6=>"\xc2\xa6", 0xA7=>"\xc2\xa7", 0xA8=>"\xc2\xa8", 0xA9=>"\xc2\xa9", 0xAA=>"\xc2\xaa", 0xAB=>"\xc2\xab", 0xAC=>"\xc2\xac", 0xAD=>"\xc2\xad", 0xAE=>"\xc2\xae", 0xAF=>"\xc2\xaf", 0xB0=>"\xc2\xb0", 0xB1=>"\xc2\xb1", 0xB2=>"\xc2\xb2", 0xB3=>"\xc2\xb3", 0xB4=>"\xc2\xb4", 0xB5=>"\xc2\xb5", 0xB6=>"\xc2\xb6", 0xB7=>"\xc2\xb7", 0xB8=>"\xc2\xb8", 0xB9=>"\xc2\xb9", 0xBA=>"\xc2\xba", 0xBB=>"\xc2\xbb", 0xBC=>"\xc2\xbc", 0xBD=>"\xc2\xbd", 0xBE=>"\xc2\xbe", 0xBF=>"\xc2\xbf", 0xC0=>"\xc3\x80", 0xC1=>"\xc3\x81", 0xC2=>"\xc3\x82", 0xC3=>"\xc3\x83", 0xC4=>"\xc3\x84", 0xC5=>"\xc3\x85", 0xC6=>"\xc3\x86", 0xC7=>"\xc3\x87", 0xC8=>"\xc3\x88", 0xC9=>"\xc3\x89", 0xCA=>"\xc3\x8a", 0xCB=>"\xc3\x8b", 0xCC=>"\xc3\x8c", 0xCD=>"\xc3\x8d", 0xCE=>"\xc3\x8e", 0xCF=>"\xc3\x8f", 0xD0=>"\xc3\x90", 0xD1=>"\xc3\x91", 0xD2=>"\xc3\x92", 0xD3=>"\xc3\x93", 0xD4=>"\xc3\x94", 0xD5=>"\xc3\x95", 0xD6=>"\xc3\x96", 0xD7=>"\xc3\x97", 0xD8=>"\xc3\x98", 0xD9=>"\xc3\x99", 0xDA=>"\xc3\x9a", 0xDB=>"\xc3\x9b", 0xDC=>"\xc3\x9c", 0xDD=>"\xc3\x9d", 0xDE=>"\xc3\x9e", 0xDF=>"\xc3\x9f", 0xE0=>"\xc3\xa0", 0xE1=>"\xc3\xa1", 0xE2=>"\xc3\xa2", 0xE3=>"\xc3\xa3", 0xE4=>"\xc3\xa4", 0xE5=>"\xc3\xa5", 0xE6=>"\xc3\xa6", 0xE7=>"\xc3\xa7", 0xE8=>"\xc3\xa8", 0xE9=>"\xc3\xa9", 0xEA=>"\xc3\xaa", 0xEB=>"\xc3\xab", 0xEC=>"\xc3\xac", 0xED=>"\xc3\xad", 0xEE=>"\xc3\xae", 0xEF=>"\xc3\xaf", 0xF0=>"\xc3\xb0", 0xF1=>"\xc3\xb1", 0xF2=>"\xc3\xb2", 0xF3=>"\xc3\xb3", 0xF4=>"\xc3\xb4", 0xF5=>"\xc3\xb5", 0xF6=>"\xc3\xb6", 0xF7=>"\xc3\xb7", 0xF8=>"\xc3\xb8", 0xF9=>"\xc3\xb9", 0xFA=>"\xc3\xba", 0xFB=>"\xc3\xbb", 0xFC=>"\xc3\xbc", 0xFD=>"\xc3\xbd", 0xFE=>"\xc3\xbe" ), 'win1257' => array( 0x80=>"\xe2\x82\xac", 0x81=>"\xc0\x4", 0x82=>"\xe2\x80\x9a", 0x83=>"\xc0\x4", 0x84=>"\xe2\x80\x9e", 0x85=>"\xe2\x80\xa6", 0x86=>"\xe2\x80\xa0", 0x87=>"\xe2\x80\xa1", 0x88=>"\xc0\x4", 0x89=>"\xe2\x80\xb0", 0x8A=>"\xc0\x4", 0x8B=>"\xe2\x80\xb9", 0x8C=>"\xc0\x4", 0x8D=>"\xc2\xa8", 0x8E=>"\xcb\x87", 0x8F=>"\xc2\xb8", 0x90=>"\xc0\x4", 0x91=>"\xe2\x80\x98", 0x92=>"\xe2\x80\x99", 0x93=>"\xe2\x80\x9c", 0x94=>"\xe2\x80\x9d", 0x95=>"\xe2\x80\xa2", 0x96=>"\xe2\x80\x93", 0x97=>"\xe2\x80\x94", 0x98=>"\xc0\x4", 0x99=>"\xe2\x84\xa2", 0x9A=>"\xc0\x4", 0x9B=>"\xe2\x80\xba", 0x9C=>"\xc0\x4", 0x9D=>"\xc2\xaf", 0x9E=>"\xcb\x9b", 0x9F=>"\xc0\x4", 0xA0=>"\xc2\xa0", 0xA1=>"\xc0\x4", 0xA2=>"\xc2\xa2", 0xA3=>"\xc2\xa3", 0xA4=>"\xc2\xa4", 0xA5=>"\xc0\x4", 0xA6=>"\xc2\xa6", 0xA7=>"\xc2\xa7", 0xA8=>"\xc3\x98", 0xA9=>"\xc2\xa9", 0xAA=>"\xc5\x96", 0xAB=>"\xc2\xab", 0xAC=>"\xc2\xac", 0xAD=>"\xc2\xad", 0xAE=>"\xc2\xae", 0xAF=>"\xc3\x86", 0xB0=>"\xc2\xb0", 0xB1=>"\xc2\xb1", 0xB2=>"\xc2\xb2", 0xB3=>"\xc2\xb3", 0xB4=>"\xc2\xb4", 0xB5=>"\xc2\xb5", 0xB6=>"\xc2\xb6", 0xB7=>"\xc2\xb7", 0xB8=>"\xc3\xb8", 0xB9=>"\xc2\xb9", 0xBA=>"\xc5\x97", 0xBB=>"\xc2\xbb", 0xBC=>"\xc2\xbc", 0xBD=>"\xc2\xbd", 0xBE=>"\xc2\xbe", 0xBF=>"\xc3\xa6", 0xC0=>"\xc4\x84", 0xC1=>"\xc4\xae", 0xC2=>"\xc4\x80", 0xC3=>"\xc4\x86", 0xC4=>"\xc3\x84", 0xC5=>"\xc3\x85", 0xC6=>"\xc4\x98", 0xC7=>"\xc4\x92", 0xC8=>"\xc4\x8c", 0xC9=>"\xc3\x89", 0xCA=>"\xc5\xb9", 0xCB=>"\xc4\x96", 0xCC=>"\xc4\xa2", 0xCD=>"\xc4\xb6", 0xCE=>"\xc4\xaa", 0xCF=>"\xc4\xbb", 0xD0=>"\xc5\xa0", 0xD1=>"\xc5\x83", 0xD2=>"\xc5\x85", 0xD3=>"\xc3\x93", 0xD4=>"\xc5\x8c", 0xD5=>"\xc3\x95", 0xD6=>"\xc3\x96", 0xD7=>"\xc3\x97", 0xD8=>"\xc5\xb2", 0xD9=>"\xc5\x81", 0xDA=>"\xc5\x9a", 0xDB=>"\xc5\xaa", 0xDC=>"\xc3\x9c", 0xDD=>"\xc5\xbb", 0xDE=>"\xc5\xbd", 0xDF=>"\xc3\x9f", 0xE0=>"\xc4\x85", 0xE1=>"\xc4\xaf", 0xE2=>"\xc4\x81", 0xE3=>"\xc4\x87", 0xE4=>"\xc3\xa4", 0xE5=>"\xc3\xa5", 0xE6=>"\xc4\x99", 0xE7=>"\xc4\x93", 0xE8=>"\xc4\x8d", 0xE9=>"\xc3\xa9", 0xEA=>"\xc5\xba", 0xEB=>"\xc4\x97", 0xEC=>"\xc4\xa3", 0xED=>"\xc4\xb7", 0xEE=>"\xc4\xab", 0xEF=>"\xc4\xbc", 0xF0=>"\xc5\xa1", 0xF1=>"\xc5\x84", 0xF2=>"\xc5\x86", 0xF3=>"\xc3\xb3", 0xF4=>"\xc5\x8d", 0xF5=>"\xc3\xb5", 0xF6=>"\xc3\xb6", 0xF7=>"\xc3\xb7", 0xF8=>"\xc5\xb3", 0xF9=>"\xc5\x82", 0xFA=>"\xc5\x9b", 0xFB=>"\xc5\xab", 0xFC=>"\xc3\xbc", 0xFD=>"\xc5\xbc", 0xFE=>"\xc5\xbe", 0xFF=>"\xcb\x99" ), ); } function ToUTF8string(string $String, string $Charset = 'iso2'): string { $Result = ''; for ($I = 0; $I < strlen($String); $I++) { if (ord($String[$I]) < 128) $Result .= $String[$I]; else if (ord($String[$I]) > 127) { $Result .= $this->CharTable[$Charset][ord($String[$I])]; } } return $Result; } function FromUTF8(string $String, string $Charset = 'iso2'): string { $Result = ''; $UTFPrefix = ''; for ($I = 0; $I < strlen($String); $I++) { if (ord($String[$I]) & 0x80) // UTF control character { if (ord($String[$I]) & 0x40) // First { if ($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $this->CharTable[$Charset])); $UTFPrefix = $String[$I]; } else $UTFPrefix .= $String[$I]; // Next } else { if ($UTFPrefix != '') $Result .= chr(array_search($UTFPrefix, $this->CharTable[$Charset])); $UTFPrefix = ''; $Result .= $String[$I]; } } return $Result; } }