Location: PHPKode > scripts > Chinese Encode Convert > chinese-encode-convert/Convert.class.php
<?php
/**
 * 名称:Chinese
 * 版本:0.2 Beta
 * 版权:无,爱怎么用就怎么用.
 * 苦力:Lazy
 * 邮件:base64=>'bzBsYXp5MG9AZ21haWwuY29t'
 * 猪页:http://www.ourmind.cn/(计划中,虽然计划了1年了...)
 * 广告:欢迎广播剧爱好者访问->http://www.52radio.net/,分享交流你喜爱的广播剧.
 * 致谢:	类的代码均来自互联网,所以我不敢贴上作者的名号,其中=>
 * 		简繁转换的作者未知,Google得来的,网站我忘记了.原代码也没帖作者是谁.抱歉.
 *		GBK转拼音来自于hightman,详见:http://cws.twomice.net/py/getpy.php?source
 * 		GBK,BIG转UTF,UTF转GBK,BIG的代码来自于Wen,详见:http://www.wensh.net/archive.php/topic/287.html
 * 		GBK,BIG,UTF码表来自于http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
 * 		UTF转UNI来自于织梦(我也不知道,Google得来上面是这样标明的)
 * 使用:
 * 		$Conv=new Chinese;
 * 		$String='GB2312转BIG(简转繁体)';
 * 		$String=$Conv->Convert('GB','BIG',$String);
 * 		$String='BIG转GB2312(繁转简体);
 * 		$String=$Conv->Convert('BIG','GB',$String);
 * 		$String='GBK转UTF';
 * 		$String=$Conv->Convert('GBK','UTF',$String);
 * 		$String='BIG转UTF';
 * 		$String=$Conv->Convert('BIG','UTF',$String);
 * 		$String='UTF转GBK';
 * 		$String=$Conv->Convertt('UTF','GBK',$String);
 * 		$String='UTF转BIG';
 * 		$String=$Conv->Convert('UTF','BIG',$String);
 */
Class Chinese{
	Var $ICONV=false;
	Var $Large=1040;
	Function Chinese(){
//		$this->ICONV=function_exists('iconv')?true:false;
	}

	Function Convert($Source,$Target,$String){
//		if($this->ICONV){
//			return iconv($Source,$Target,$String);
//		}
		$Source=strtoupper($Source);
		$Target=strtoupper($Target);
		$Func=$Source.'2'.$Target;
		return $this->$Func($String);
	}

	Function GB2BIG($String){
		return strlen($String)<$this->Large?$this->CHS2CHS($String,'BIG','File'):$this->CHS2CHS($String,'BIG','MEM');
	}

	Function BIG2GB($String){
		return strlen($String)<$this->Large?$this->CHS2CHS($String,'GB','File'):$this->CHS2CHS($String,'GB','MEM');
	}

	Function GBK2PY($String){
		return strlen($String)<$this->Large?$this->GBK2PINYIN($String,'File'):$this->GBK2PINYIN($String,'MEM');
	}

	Function GBK2UTF($String){
		return strlen($String)<$this->Large?$this->CHS2UNI($String,'GBK','UTF','File'):$this->CHS2UNI($String,'GBK','UTF','MEM');
	}

	Function BIG2UTF($String){
		return strlen($String)<$this->Large?$this->CHS2UNI($String,'BIG','UTF','File'):$this->CHS2UNI($String,'BIG','UTF','MEM');
	}

	Function UTF2GBK($String){
		return strlen($String)<$this->Large?$this->UTF2CHS($String,'GBK','File'):$this->UTF2CHS($String,'BIG','MEM');
	}
	
	Function UTF2BIG($String){
		return strlen($String)<$this->Large?$this->UTF2CHS($String,'BIG','File'):$this->UTF2CHS($String,'BIG','MEM');
	}

	Function UNI2UTF($Char){
		$Return='';
		if($Char<0x80){
			$Return.=$Char;
		}elseif($Char<0x800){
			$Return.=chr(0xC0|$Char>>6);
			$Return.=chr(0x80|$Char&0x3F);
		}elseif($Char<0x10000){
			$Return.=chr(0xE0|$Char>>12);
			$Return.=chr(0x80|$Char>>6&0x3F);
			$Return.=chr(0x80|$Char&0x3F);
		}elseif($Char<0x200000){
			$Return.=chr(0xF0|$Char>>18);
			$Return.=chr(0x80|$Char>>12&0x3F);
			$Return.=chr(0x80|$Char>>6&0x3F);
			$Return.=chr(0x80|$Char&0x3F);
		}
		return $Return;
	}

	Function UTF2UNI($Char){
		switch(strlen($Char)){
			case 1:
				return ord($Char);
			case 2:
				$OutStr=(ord($Char[0])&0x3f)<<6;
				$OutStr+=ord($Char[1])&0x3f;
				return $OutStr;
			case 3:
				$OutStr=(ord($Char[0])&0x1f)<<12;
				$OutStr+=(ord($Char[1])&0x3f)<<6;
				$OutStr+=ord($Char[2])&0x3f;
				return $OutStr;
			case 4:
				$OutStr=(ord($Char[0])&0x0f)<<18;
				$OutStr+=(ord($Char[1])&0x3f)<<12;
				$OutStr+=(ord($Char[2])&0x3f)<<6;
				$OutStr+=ord($Char[3])&0x3f;
				return $OutStr;
		}
	}

	Function CHS2CHS($String,$Target,$Type){
		$TableFile=$Target=='GB'?'./Tables/BIG2GB.Table':'./Tables/GB2BIG.Table';
		$MapTable=$Type=='MEM'?file_get_contents($TableFile):fopen($TableFile,'rb');
		$StringLenth=strlen($String);
		$ReturnStr='';
		for($Foo=0;$Foo<$StringLenth;$Foo++){
			if(ord(substr($String,$Foo,1))>127){
				$Str=substr($String,$Foo,2);
				$High=ord($Str[0]);
				$Low=ord($Str[1]);
				$MapAddr=(($High-160)*510)+($Low-1)*2;
				if($Type=='MEM'){
					$High=$MapTable[$MapAddr];
					$Low=$MapTable[$MapAddr+1];
				}else{
					fseek($MapTable,$MapAddr);
					$High=fgetc($MapTable);
					$Low=fgetc($MapTable);
				}
				$ReturnStr.="$High$Low";
				$Foo++;
			}else{
				$ReturnStr.=$String[$Foo];
			}
		}
		$Type=='MEM'?null:fclose($MapTable);
		return $ReturnStr;
	}

	Function GBK2PINYIN($String,$Type='File'){
		$TableFile='./Tables/GBK2PY.Table';
		$MapTable=$Type=='MEM'?file_get_contents($TableFile):$MapTable=fopen($TableFile,'rb');
		$StringLenth=strlen($String);
		$ReturnStr='';
		for($Foo=0;$Foo<$StringLenth;$Foo++){
			$Char=ord(substr($String,$Foo,1));
			if($Char>127){
				$Str=substr($String,$Foo,2);
				$High=ord($Str[0])-129;
				$Low=ord($Str[1])-64;
				$Addr=($High<<8)+$Low-($High*64);
				if($Addr<0){
					$ReturnStr.='_';
				}else{
					$MapAddr=$Addr*8;
					if($Type=='MEM'){
						$MapStr='';
						for($Tmp=0;$Tmp<8;$Tmp++){
							$MapStr.=$MapTable[($MapAddr+$Tmp)];
						}
						$BinStr=unpack('a8py',$MapStr);
					}else{
						fseek($MapTable,$MapAddr,SEEK_SET);
						$BinStr=unpack('a8py',fread($MapTable,8));
					}
					$Foo++;
					$ReturnStr.=$BinStr['py'];
				}
			}else{
				$ReturnStr.=$String[$Foo];
			}
		}
		$Type=='MEM'?null:fclose($MapTable);
		return $ReturnStr;
	}

	Function CHS2UNI($String,$Source='GBK',$Target='UTF',$Type='File'){
		$MapFile=$Source=='GBK'?'./Tables/GBK2UNI.Table':'./Tables/BIG2UNI.Table';
		if($Type=='File'){
			$MapTable=fopen($MapFile,'rb');
			$Tmp=fread($MapTable,2);
			$MapSize=ord($Tmp[0])+256*ord($Tmp[1]);
		}else{
			$MapTable=file_get_contents($MapFile);
			$MapSize=ord($MapTable[0])+256*ord($MapTable[1]);
		}
		$ReturnStr='';
		$StringLenth=strlen($String);
		for($Foo=0;$Foo<$StringLenth;$Foo++){
			if(ord($String[$Foo])>127){
				$Str=substr($String,$Foo,2);
				$StrEncode=hexdec(bin2hex($Str));
				$SearchStart=1;
				$SearchEnd=$MapSize;
				while($SearchStart<$SearchEnd-1){
					$SearchMid=floor(($SearchStart+$SearchEnd)/2);
					$MapAddr=4*($SearchMid-1)+2;
					if($Type=='MEM'){
						$MapEncode=ord($MapTable[$MapAddr])+256*ord($MapTable[$MapAddr+1]);
					}else{
						fseek($MapTable,$MapAddr);
						$TmpStr=fread($MapTable,2);
						$MapEncode=ord($TmpStr[0])+256*ord($TmpStr[1]);
					}
					if($StrEncode==$MapEncode){
						$SearchStart=$SearchMid;
						break;
					}
					$StrEncode>$MapEncode?$SearchStart=$SearchMid:$SearchEnd=$SearchMid;
				}
				$MapAddr=2+4*($SearchStart-1);
				if($Type=='MEM'){
					$Encode=ord($MapTable[$MapAddr])+256*ord($MapTable[$MapAddr+1]);
				}else{
					fseek($MapTable,$MapAddr);
					$TmpStr=fread($MapTable,2);
					$Encode=ord($TmpStr[0])+256*ord($TmpStr[1]);
				}
				if($StrEncode==$Encode){
					if($Type=='MEM'){
						$StrUni=ord($MapTable[$MapAddr+2])+256*ord($MapTable[$MapAddr+3]);
					}else{
						$TmpStr=fread($MapTable,2);
						$StrUni=ord($TmpStr[0])+256*ord($TmpStr[1]);
					}
					$ReturnStr.=$Target=='UTF'?$this->UNI2UTF($StrUni):$StrUni;
				}else{
					$ReturnStr.='__';
				}
				$Foo++;
			}else{
				$ReturnStr.=$String[$Foo];
			}
		}
		$Type=='MEM'?null:fclose($MapTable);
		return $ReturnStr;
	}

	Function UTF2CHS($String,$Target='GBK',$Type='File'){
		$MapFile=$Target=='GBK'?'./Tables/UNI2GBK.Table':'./Tables/UNI2BIG.Table';
		if($Type=='File'){
			$MapTable=fopen($MapFile,'rb');
			$Tmp=fread($MapTable,2);
			$MapSize=ord($Tmp[0])+256*ord($Tmp[1]);
		}else{
			$MapTable=file_get_contents($MapFile);
			$MapSize=ord($MapTable[0])+256*ord($MapTable[1]);
		}
		$ReturnStr='';
		$StringLenth=strlen($String);
		for($Foo=0;$Foo<$StringLenth;$Foo++){
			if(ord($String[$Foo])>127){
				$StrEncode=$this->UTF2UNI(substr($String,$Foo,3));
				$SearchStart=1;
				$SearchEnd=$MapSize;
				while($SearchStart<$SearchEnd-1){
					$SearchMid=floor(($SearchStart+$SearchEnd)/2);
					$MapAddr=4*($SearchMid-1)+2;
					if($Type=='MEM'){
						$MapEncode=ord($MapTable[$MapAddr])+256*ord($MapTable[$MapAddr+1]);
					}else{
						fseek($MapTable,$MapAddr);
						$TmpStr=fread($MapTable,2);
						$MapEncode=ord($TmpStr[0])+256*ord($TmpStr[1]);
					}
					if($StrEncode==$MapEncode){
						$SearchStart=$SearchMid;
						break;
					}
					$StrEncode>$MapEncode?$SearchStart=$SearchMid:$SearchEnd=$SearchMid;
				}
				$MapAddr=2+4*($SearchStart-1);
				if($Type=='MEM'){
					$Encode=ord($MapTable[$MapAddr])+256*ord($MapTable[$MapAddr+1]);
				}else{
					fseek($MapTable,$MapAddr);
					$TmpStr=fread($MapTable,2);
					$Encode=ord($TmpStr[0])+256*ord($TmpStr[1]);
				}
				if($StrEncode==$Encode){
					if($Type=='MEM'){
						$Low=$MapTable[$MapAddr+2];
						$High=$MapTable[$MapAddr+3];
					}else{
						$TmpStr=fread($MapTable,2);
						$High=$TmpStr[1];
						$Low=$TmpStr[0];
					}
					$ReturnStr.="$High$Low";
				}else{
					$ReturnStr.='__';
				}
				$Foo=$Foo+2;
			}else{
				$ReturnStr.=$String[$Foo];
			}
		}
		$Type=='MEM'?null:fclose($MapTable);
		return $ReturnStr;
	}
}
$String='歡迎廣播劇愛好者訪問->http://www.52radio.net/,分享交流你喜愛的廣播劇.';
echo $String;
$Conv=new Chinese;
$String=$Conv->Convert('UTF','GBK',$String);
var_dump($String);
?>
Return current item: Chinese Encode Convert