www.gusucode.com > 08CMS空白站群系统 3.3 繁体 UTF-8 > upload/include/chinese.cls.php

    <?php
!defined('M_COM') && exit('No Permission');
define('CODETABLEDIR', M_ROOT.'/include/encoding/');
class Chinese{
    var $PINYIN_table = array();
    var $unicode_table = array();
    var $ctf;
    var $SourceText = "";
    var $config  =  array(
        'codetable_dir'         => CODETABLEDIR,
        'source_lang'           => '', 
        'target_lang'           => '', 
        'GBtoBIG5_table'        => 'gb-big5.table',
        'BIG5toGB_table'        => 'big5-gb.table',
        'GBtoPINYIN_table'      => 'gb-pinyin.table',
        'GBtoUnicode_table'     => 'gb-unicode.table',
        'BIG5toUnicode_table'   => 'big5-unicode.table'
    );
    function Chinese(){
    }

    function Convert( $source_lang , $target_lang , $source_string='' ){
        if ($source_lang == $target_lang || $source_string == '')
        {
            return $source_string;
        }
        
        if ($source_lang != '') {
            $this->config['source_lang'] = $source_lang;
        }

        if ($target_lang != '') {
            $this->config['target_lang'] = $target_lang;
        }

        $this->SourceText = $source_string;
        
        $this->OpenTable();
        if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && ($this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
            return $this->GB2312toBIG5();
        }
        if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="PINYIN" ) {
            return $this->CHStoPINYIN();
        }
        if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5" || $this->config['source_lang']=="UTF8") && ($this->config['target_lang']=="UTF8" || $this->config['target_lang']=="GB2312" || $this->config['target_lang']=="BIG5") ) {
            return $this->CHStoUTF8();
        }
        if ( ($this->config['source_lang']=="GB2312" || $this->config['source_lang']=="BIG5") && $this->config['target_lang']=="UNICODE" ) {
            return $this->CHStoUNICODE();
        }
    }
    function _hex2bin( $hexdata )
    {
        $bindata = '';
        for ( $i=0; $i<strlen($hexdata); $i+=2 )
        $bindata.=chr(hexdec(substr($hexdata,$i,2)));

        return $bindata;
    }
    function OpenTable()
    {
        if ($this->config['source_lang']=="GB2312") {
            if ($this->config['target_lang'] == "BIG5") {
                $this->ctf = fopen($this->config['codetable_dir'].$this->config['GBtoBIG5_table'], "r");
                if (is_null($this->ctf)) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
            }

            if ($this->config['target_lang'] == "PINYIN") {
                $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPINYIN_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $i = 0;
                for ($i=0; $i<count($tmp); $i++) {
                    $tmp1 = explode("	", $tmp[$i]);
                    $this->PINYIN_table[$i]=array($tmp1[0],$tmp1[1]);
                }
            }

            if ($this->config['target_lang'] == "UTF8") {
                $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to convert encoding!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
            }

            if ($this->config['target_lang'] == "UNICODE") {
                $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
            }
        }

        if ($this->config['source_lang']=="BIG5") {
            if ($this->config['target_lang'] == "GB2312") {
                $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
                if (is_null($this->ctf)) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
            }
            if ($this->config['target_lang'] == "UTF8") {
                $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,7,6);
            }

            if ($this->config['target_lang'] == "UNICODE") {
                $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,0,6))]=substr($value,9,4);
            }

            if ($this->config['target_lang'] == "PINYIN") {
                $tmp = @file($this->config['codetable_dir'].$this->config['GBtoPINYIN_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $i = 0;
                for ($i=0; $i<count($tmp); $i++) {
                    $tmp1 = explode("	", $tmp[$i]);
                    $this->PINYIN_table[$i]=array($tmp1[0],$tmp1[1]);
                }
            }
        }

        if ($this->config['source_lang']=="UTF8") {

            if ($this->config['target_lang'] == "GB2312") {
                $tmp = @file($this->config['codetable_dir'].$this->config['GBtoUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
            }

            if ($this->config['target_lang'] == "BIG5") {
                $tmp = @file($this->config['codetable_dir'].$this->config['BIG5toUnicode_table']);
                if (!$tmp) {
                    echo 'Fail to open coverting table!';
                    exit;
                }
                $this->unicode_table = array();
                while(list($key,$value)=each($tmp))
                $this->unicode_table[hexdec(substr($value,7,6))]=substr($value,0,6);
            }
        }

    } 
    function OpenFile( $position , $isHTML=false )
    {
        $tempcontent = @file($position);

        if (!$tempcontent) {
            echo 'Fail to open file!';
            exit;
        }

        $this->SourceText = implode("",$tempcontent);

        if ($isHTML) {
            $this->SourceText = eregi_replace( "charset=".$this->config['source_lang'] , "charset=".$this->config['target_lang'] , $this->SourceText);

            $this->SourceText = eregi_replace("\n", "", $this->SourceText);

            $this->SourceText = eregi_replace("\r", "", $this->SourceText);
        }
    } 
    function SiteOpen( $position )
    {
        $tempcontent = @file($position);

        if (!$tempcontent) {
            echo 'Fail to open file!';
            exit;
        }

        $this->SourceText = implode("",$tempcontent);

        $this->SourceText = eregi_replace( "charset=".$this->config['source_lang'] , "charset=".$this->config['target_lang'] , $this->SourceText);


    }
    function setvar( $parameter , $value )
    {
        if(!trim($parameter))
        return $parameter;

        $this->config[$parameter] = $value;

    }
    function CHSUtoUTF8($c)
    {
        $str="";

        if ($c < 0x80) {
            $str.=$c;
        }

        else if ($c < 0x800) {
            $str.=(0xC0 | $c>>6);
            $str.=(0x80 | $c & 0x3F);
        }

        else if ($c < 0x10000) {
            $str.=(0xE0 | $c>>12);
            $str.=(0x80 | $c>>6 & 0x3F);
            $str.=(0x80 | $c & 0x3F);
        }

        else if ($c < 0x200000) {
            $str.=(0xF0 | $c>>18);
            $str.=(0x80 | $c>>12 & 0x3F);
            $str.=(0x80 | $c>>6 & 0x3F);
            $str.=(0x80 | $c & 0x3F);
        }

        return $str;
    }
    function CHStoUTF8(){

        if ($this->config["source_lang"]=="BIG5" || $this->config["source_lang"]=="GB2312") {
            $ret="";

            while($this->SourceText){

                if(ord(substr($this->SourceText,0,1))>127){

                    if ($this->config["source_lang"]=="BIG5") {
                        $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))]));
                    }
                    if ($this->config["source_lang"]=="GB2312") {
                        $utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080]));
                    }
                    for($i=0;$i<strlen($utf8);$i+=3)
                    $ret.=chr(substr($utf8,$i,3));

                    $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
                }

                else{
                    $ret.=substr($this->SourceText,0,1);
                    $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
                }
            }
            $this->unicode_table = array();
            $this->SourceText = "";
            return $ret;
        }

        if ($this->config["source_lang"]=="UTF8") {
            $out = "";
            $len = strlen($this->SourceText);
            $i = 0;
            while($i < $len) {
                $c = ord( substr( $this->SourceText, $i++, 1 ) );
                switch($c >> 4)
                {
                    case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                        // 0xxxxxxx
                        $out .= substr( $this->SourceText, $i-1, 1 );
                        break;
                    case 12: case 13:
                        // 110x xxxx   10xx xxxx
                        $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
                        $char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];

                        if ($this->config["target_lang"]=="GB2312")
                        $out .= $this->_hex2bin( dechex(  $char3 + 0x8080 ) );

                        if ($this->config["target_lang"]=="BIG5")
                        $out .= $this->_hex2bin( $char3 );
                        break;
                    case 14:
                        // 1110 xxxx  10xx xxxx  10xx xxxx
                        $char2 = ord( substr( $this->SourceText, $i++, 1 ) );
                        $char3 = ord( substr( $this->SourceText, $i++, 1 ) );
                        $char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];

                        if ($this->config["target_lang"]=="GB2312")
                        $out .= $this->_hex2bin( dechex ( $char4 + 0x8080 ) );

                        if ($this->config["target_lang"]=="BIG5")
                        $out .= $this->_hex2bin( $char4 );
                        break;
                }
            }

            return $out;
        }
    } 
    function CHStoUNICODE()
    {

        $utf="";

        while($this->SourceText)
        {
            if (ord(substr($this->SourceText,0,1))>127)
            {

                if ($this->config["source_lang"]=="GB2312")
                $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))-0x8080].";";

                if ($this->config["source_lang"]=="BIG5")
                $utf.="&#x".$this->unicode_table[hexdec(bin2hex(substr($this->SourceText,0,2)))].";";

                $this->SourceText=substr($this->SourceText,2,strlen($this->SourceText));
            }
            else
            {
                $utf.=substr($this->SourceText,0,1);
                $this->SourceText=substr($this->SourceText,1,strlen($this->SourceText));
            }
        }
        return $utf;
    }
    function GB2312toBIG5()
    {
        $max=strlen($this->SourceText)-1;

        for($i=0;$i<$max;$i++){

            $h=ord($this->SourceText[$i]);

            if($h>=160){

                $l=ord($this->SourceText[$i+1]);

                if($h==161 && $l==64){
                    $gb="  ";
                }
                else{
                    fseek($this->ctf,($h-160)*510+($l-1)*2);
                    $gb=fread($this->ctf,2);
                }

                $this->SourceText[$i]=$gb[0];
                $this->SourceText[$i+1]=$gb[1];
                $i++;
            }
        }
        fclose($this->ctf);

        $result = $this->SourceText;

        $this->SourceText = "";

        return $result;
    } 
    function PINYINSearch($num){

        if($num>0&&$num<160){
            return chr($num);
        }

        elseif($num<-20319||$num>-10247){
            return "";
        }

        else{

            for($i=count($this->PINYIN_table)-1;$i>=0;$i--){
                if($this->PINYIN_table[$i][1]<=$num)
                break;
            }

            return $this->PINYIN_table[$i][0];
        }
    }
    function CHStoPINYIN(){
        if ( $this->config['source_lang']=="BIG5" ) {
            $this->ctf = fopen($this->config['codetable_dir'].$this->config['BIG5toGB_table'], "r");
            if (is_null($this->ctf)) {
                echo 'Fail to open file!';
                exit;
            }

            $this->SourceText = $this->GB2312toBIG5();
            $this->config['target_lang'] = "PINYIN";
        }

        $ret = array();
        $ri = 0;
        for($i=0;$i<strlen($this->SourceText);$i++){

            $p=ord(substr($this->SourceText,$i,1));

            if($p>160){
                $q=ord(substr($this->SourceText,++$i,1));
                $p=$p*256+$q-65536;
            }

            $ret[$ri]=$this->PINYINSearch($p);
            $ri = $ri + 1;
        }
        $this->SourceText = "";

        $this->PINYIN_table = array();

        return implode(" ", $ret);
    }

  }


?>