php_字符串编码ASCII_GBK_UTF-8检测_中文字符串_求子串
<?php//判断一个字符,是否ASCII
//'a'
//
//ord('a') = 97
//字母..
function isCharASCII($s){
return (ord($s) < 128);
}
//
//echo '<hr/>';
//var_dump(isCharASCII('a'));
//var_dump(isCharASCII('A'));
//var_dump(isCharASCII('0'));
//var_dump(isCharASCII('#'));
// echo ord('z');//122 a 97
//echo '<br>';
//$str = '国';
//var_dump($str);
//echo (chr(192).chr(128));
/**
* 11100000
* 128 + 64 + 32 =
* 96
* 224
* Enter description here ...
* @param unknown_type $s
*/
//????
//对于汉字GB编码第一个字节是\\
//GBK
//一个汉字 "中" 第二个字节是
//110***** 192
//10****** 128
//(有特例,比如联字)
//UTF-8编码第一个字节是1110****第二个字节是10******第三个字节是10******
//按位与出来结果要跟上面非星号相同,所以应该先判断UTF-8
//因为使用GB的掩码按位与,UTF-8的111得出来的也是110,所以要先判断UTF-8
//UTF-8
//通用...
//三个字节存一个汉字。。
//11100000 224
//10000000 128
//10000000 128
//对于英文,GB和UTF-8都是单字节的
//ASCII码小于128的值 afdfz A
//ASCII GBK UTF-8;;
function isUTF8OrGBK($s){
if((ord($s)&224)==224){
if((ord($s)&128)==128){
if((ord($s)&128)==128){
return 'UTF-8';
}
}
}
//128 + 64 = 192 =
if((ord($s)&192)==192){
if((ord($s)&128)==128){
return "GBK";
}
}
return NULL;
}
//=========================
//1100 0000
//c 0
//echo '<hr/>';
//var_dump(isUTF8OrGBK('a'));
//echo '<hr/>';
//var_dump(isUTF8OrGBK('中'));
//echo '<br />';
//$s = iconv("GBK", "UTF-8", '中');
//echo $s;
//var_dump(isUTF8OrGBK($s));
//$i = 1;
//$j = 1;
//$i 00000000 00000000 00000000 00000001
//$j 00000000 00000000 00000000 00000001
//---------------------------------------
// 00000000 00000000 00000000 00000001
//
//1 & 1 = 1
//0 & 1 = 0
//1 & 0 = 0;
//0 & 0 = 0
//192 128 + 64 = 192
// 193
// 224
//00000000 00000000 00000000 11000000
//00000000 00000000 00000000 11000001
//------------------------------------
//00000000 00000000 00000000 11000000
//128 + 64 + 32 = 224
//00000000 00000000 00000000 11000000
//00000000 00000000 00000000 11100000
//---------------------------------------
//00000000 00000000 00000000 11000000
//192 128
//00000000 00000000 00000000 11000000
//00000000 00000000 00000000 10000000
//=====================================
//00000000 00000000 00000000 10000000 128
//echo $i & $j;
echo '<hr/>';
function msubstr($str, $start, $len) {
$tmpstr = '';
$strlen = $start + $len;// $strlen = 2
//$str = 0;
for($i = 0; $i < $start; $i++){
if(isGBK($str[$i])){
$start++;
$strlen++;
$i++;
}
}
for($i = $start; $i < $strlen; $i++) { // 0 1
if(ord(substr($str, $i, 1)) >= 0xa0) {//0xa0 瓜
$strlen++;
$tmpstr .= substr($str, $i, 2); // $tmpstr = "西";
$i++;
} else
$tmpstr .= substr($str, $i, 1);
}
return $tmpstr;
}
function isGBK($s){
return ord(substr($s, 0, 1)) >= 0xa0;
}
//echo msubstr("西瓜cd", 0, 2).'<br/>'; //西瓜
//echo msubstr("a中cd", 0, 2).'<br/>'; //a中
//echo msubstr("中ac国人民cd", 3, 3).'<br/>'; //国人民
//echo msubstr("中中国人民cd", 3, 3).'<br/>'; //人民c
//echo msubstr("中中ac国人民cd", 3, 3).'<br/>';//c国人
//echo msubstr("中中ac国人民cd", 3, 1).'<br/>';//c
//echo msubstr("中中ac国人民cd", 3, 2).'<br/>';//c国
//echo msubstr("中ab国df人gdf民共和国cd",5,3).'<br/>';
//var_dump(isGBK('a'));
?>
页:
[1]