sele 发表于 2018-12-21 12:50:30

PHP Big5 Utf-8 GB2312 相互转码解决办法

// Big5 => GB  
function b2g( $instr ) {
  
$fp = fopen( 'language/big5-gb.tab', 'r' );
  
$len = strlen($instr);
  
for( $i = 0 ; $i < $len ; $i++ ) {
  
$h = ord($instr[$i]);
  
if( $h >= 160 ) {
  
$l = ord($instr[$i+1]);
  
if( $h == 161 && $l == 64 )
  
$gb = '';
  
else {
  
fseek( $fp, (($h-160)*255+$l-1)*3 );
  
$gb = fread( $fp, 2 );
  
}
  
$instr[$i] = $gb;
  
$instr[$i+1] = $gb;
  
$i++;
  
}
  
}
  
fclose($fp);
  
return $instr;
  
}
  
// GB => BIG5
  
function g2b( $instr ) {
  
$fp = fopen( 'language/gb-big5.tab', 'r' );
  
$len = strlen($instr);
  
for( $i = 0 ; $i < $len ; $i++ ) {
  
$h = ord($instr[$i]);
  
if( $h > 160 && $h < 248 ) {
  
$l = ord($instr[$i+1]);
  
if( $l > 160 && $l < 255 ) {
  
fseek( $fp, (($h-161)*94+$l-161)*3 );
  
$bg = fread( $fp, 2 );
  
}
  
else
  
$bg = '';
  
$instr[$i] = $bg;
  
$instr[$i+1] = $bg;
  
$i++;
  
}
  
}
  
fclose($fp);
  
return $instr;
  
}
  
// Big5 => Unicode(UtF-8)
  
function b2u( $instr ) {
  
$fp = fopen( 'language/big5-unicode.tab', 'r' );
  
$len = strlen($instr);
  
$outstr = '';
  
for( $i = $x = 0 ; $i < $len ; $i++ ) {
  
$h = ord($instr[$i]);
  
if( $h >= 160 ) {
  
$l = ord($instr[$i+1]);
  
if( $h == 161 && $l == 64 )
  
$uni = '';
  
else {
  
fseek( $fp, ($h-160)*510+($l-1)*2 );
  
$uni = fread( $fp, 2 );
  
}
  
$codenum = ord($uni)*256 + ord($uni);
  
if( $codenum < 0x800 ) {
  
$outstr[$x++] = chr( 192 + $codenum / 64 );
  
$outstr[$x++] = chr( 128 + $codenum % 64 );
  
#printf("[%02X%02X]n", ord($outstr[$x-2]), ord($uni[$x-1]) );
  
}
  
else {
  
$outstr[$x++] = chr( 224 + $codenum / 4096 );
  
$codenum %= 4096;
  
$outstr[$x++] = chr( 128 + $codenum / 64 );
  
$outstr[$x++] = chr( 128 + ($codenum % 64) );
  
#printf("[%02X%02X%02X]n", ord($outstr[$x-3]), ord($outstr[$x-2]), ord($outstr[$x-1]) );
  
}
  
$i++;
  
}
  
else
  
$outstr[$x++] = $instr[$i];
  
}
  
fclose($fp);
  
if( $instr != '' )
  
return join( '', $outstr);
  
}
  
// Unicode(UTF-8) => BIG5
  
function u2b( $instr ) {
  
$fp = fopen( 'language/unicode-big5.tab', 'r' );
  
$len = strlen($instr);
  
$outstr = '';
  
for( $i = $x = 0 ; $i < $len ; $i++ ) {
  
$b1 = ord($instr[$i]);
  
if( $b1 < 0x80 ) {
  
$outstr[$x++] = chr($b1);
  
#printf( "[%02X]", $b1);
  
}
  
elseif( $b1 >= 224 ) {# 3 bytes UTF-8
  
$b1 -= 224;
  
$b2 = ord($instr[$i+1]) - 128;
  
$b3 = ord($instr[$i+2]) - 128;
  
$i += 2;
  
$uc = $b1 * 4096 + $b2 * 64 + $b3 ;
  
fseek( $fp, $uc * 2 );
  
$bg = fread( $fp, 2 );
  
$outstr[$x++] = $bg;
  
$outstr[$x++] = $bg;
  
#printf( "[%02X%02X]", ord($bg), ord($bg));
  
}
  
elseif( $b1 >= 192 ) {# 2 bytes UTF-8
  
printf( "[%02X%02X]", $b1, ord($instr[$i+1]) );
  
$b1 -= 192;
  
$b2 = ord($instr[$i]) - 128;
  
$i++;
  
$uc = $b1 * 64 + $b2 ;
  
fseek( $fp, $uc * 2 );
  
$bg = fread( $fp, 2 );
  
$outstr[$x++] = $bg;
  
$outstr[$x++] = $bg;
  
#printf( "[%02X%02X]", ord($bg), ord($bg));
  
}
  
}
  
fclose($fp);
  
if( $instr != '' ) {
  
#echo '##' . $instr . " becomes " . join( '', $outstr) . "n";
  
return join( '', $outstr);
  
}
  
}
  
// GB => Unicode(UTF-8)
  
function g2u( $instr ) {
  
$fp = fopen( 'language/gb-unicode.tab', 'r' );
  
$len = strlen($instr);
  
$outstr = '';
  
for( $i = $x = 0 ; $i < $len ; $i++ ) {
  
$h = ord($instr[$i]);
  
if( $h > 160 ) {
  
$l = ord($instr[$i+1]);
  
fseek( $fp, ($h-161)*188+($l-161)*2 );
  
$uni = fread( $fp, 2 );
  
$codenum = ord($uni)*256 + ord($uni);
  
if( $codenum < 0x800 ) {
  
$outstr[$x++] = chr( 192 + $codenum / 64 );
  
$outstr[$x++] = chr( 128 + $codenum % 64 );
  
#printf("[%02X%02X]n", ord($outstr[$x-2]), ord($uni[$x-1]) );
  
}
  
else {
  
$outstr[$x++] = chr( 224 + $codenum / 4096 );
  
$codenum %= 4096;
  
$outstr[$x++] = chr( 128 + $codenum / 64 );
  
$outstr[$x++] = chr( 128 + ($codenum % 64) );
  
#printf("[%02X%02X%02X]n", ord($outstr[$x-3]), ord($outstr[$x-2]), ord($outstr[$x-1]) );
  
}
  
$i++;
  
}
  
else
  
$outstr[$x++] = $instr[$i];
  
}
  
fclose($fp);
  
if( $instr != '' )
  
return join( '', $outstr);
  
}
  
// Unicode(UTF-8) => GB
  
function u2g( $instr ) {
  
$fp = fopen( 'language/unicode-gb.tab', 'r' );
  
$len = strlen($instr);
  
$outstr = '';
  
for( $i = $x = 0 ; $i < $len ; $i++ ) {
  
$b1 = ord($instr[$i]);
  
if( $b1 < 0x80 ) {
  
$outstr[$x++] = chr($b1);
  
#printf( "[%02X]", $b1);
  
}
  
elseif( $b1 >= 224 ) {# 3 bytes UTF-8
  
$b1 -= 224;
  
$b2 = ord($instr[$i+1]) - 128;
  
$b3 = ord($instr[$i+2]) - 128;
  
$i += 2;
  
$uc = $b1 * 4096 + $b2 * 64 + $b3 ;
  
fseek( $fp, $uc * 2 );
  
$gb = fread( $fp, 2 );
  
$outstr[$x++] = $gb;
  
$outstr[$x++] = $gb;
  
#printf( "[%02X%02X]", ord($gb), ord($gb));
  
}
  
elseif( $b1 >= 192 ) {# 2 bytes UTF-8
  
printf( "[%02X%02X]", $b1, ord($instr[$i+1]) );
  
$b1 -= 192;
  
$b2 = ord($instr[$i]) - 128;
  
$i++;
  
$uc = $b1 * 64 + $b2 ;
  
fseek( $fp, $uc * 2 );
  
$gb = fread( $fp, 2 );
  
$outstr[$x++] = $gb;
  
$outstr[$x++] = $gb;
  
#printf( "[%02X%02X]", ord($gb), ord($gb));
  
}
  
}
  
fclose($fp);
  
if( $instr != '' ) {
  
#echo '##' . $instr . " becomes " . join( '', $outstr) . "n";
  
return join( '', $outstr);
  
}
  
}


页: [1]
查看完整版本: PHP Big5 Utf-8 GB2312 相互转码解决办法