0000-007F | 0xxxxxxx
0080-07FF | 110xxxxx 10xxxxxx
0800-FFFF | 1110xxxx 10xxxxxx 10xxxxxx
10000-10FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxxfunction htmlentity2utf8($string) {
if (!preg_match("@&#(\d+);@",$string,$matches)) return $string;
$he=$matches[1];
$he=(int)$he;
if ($he>=0x0000 && $he<=0x007f) {
$template=array("0%");
} elseif ($he>=0x0080 && $he<=0x007ff) {
$template=array("110%","10%");
} elseif ($he>=0x0800 && $he<=0xffff) {
$template=array("1110%","10%","10%");
} elseif ($he>=0x10000 && $he<=0x10ffff) {
$template=array("11110%","10%","10%","10%");
} else {
return $string;
}
$template=array_reverse($template);
$utf8='';
$he_b=(string)sprintf("%b",$he);
$offset=0;
foreach ($template as $t) {
$t_len=strlen($t);
$need_count=9-$t_len;
$offset-=$need_count;
$current_he=substr(sprintf("%0".abs($offset)."s",$he_b),$offset,$need_count);
$tmp=sprintf("%0".$need_count."d",$current_he);
$utf8=chr(base_convert((str_replace('%',$tmp,$t)),2,10)).$utf8;
}
return $utf8;
}