2016-04-20 09:56:34 -07:00
< ? php
/*
* This file is part of the Symfony package .
*
* ( c ) Fabien Potencier < fabien @ symfony . com >
*
* For the full copyright and license information , please view the LICENSE
* file that was distributed with this source code .
*/
namespace Symfony\Polyfill\Iconv ;
/**
* iconv implementation in pure PHP , UTF - 8 centric .
*
* Implemented :
* - iconv - Convert string to requested character encoding
* - iconv_mime_decode - Decodes a MIME header field
* - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
* - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
* - iconv_set_encoding - Set current setting for character encoding conversion
* - iconv_mime_encode - Composes a MIME header field
* - iconv_strlen - Returns the character count of string
* - iconv_strpos - Finds position of first occurrence of a needle within a haystack
* - iconv_strrpos - Finds the last occurrence of a needle within a haystack
* - iconv_substr - Cut out part of a string
*
* Charsets available for conversion are defined by files
* in the charset / directory and by Iconv :: $alias below .
* You ' re welcome to send back any addition you make .
*
* @ author Nicolas Grekas < p @ tchwork . com >
*
* @ internal
*/
final class Iconv
{
const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string' ;
const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed' ;
public static $inputEncoding = 'utf-8' ;
public static $outputEncoding = 'utf-8' ;
public static $internalEncoding = 'utf-8' ;
private static $alias = array (
'utf8' => 'utf-8' ,
'ascii' => 'us-ascii' ,
'tis-620' => 'iso-8859-11' ,
'cp1250' => 'windows-1250' ,
'cp1251' => 'windows-1251' ,
'cp1252' => 'windows-1252' ,
'cp1253' => 'windows-1253' ,
'cp1254' => 'windows-1254' ,
'cp1255' => 'windows-1255' ,
'cp1256' => 'windows-1256' ,
'cp1257' => 'windows-1257' ,
'cp1258' => 'windows-1258' ,
'shift-jis' => 'cp932' ,
'shift_jis' => 'cp932' ,
'latin1' => 'iso-8859-1' ,
'latin2' => 'iso-8859-2' ,
'latin3' => 'iso-8859-3' ,
'latin4' => 'iso-8859-4' ,
'latin5' => 'iso-8859-9' ,
'latin6' => 'iso-8859-10' ,
'latin7' => 'iso-8859-13' ,
'latin8' => 'iso-8859-14' ,
'latin9' => 'iso-8859-15' ,
'latin10' => 'iso-8859-16' ,
'iso8859-1' => 'iso-8859-1' ,
'iso8859-2' => 'iso-8859-2' ,
'iso8859-3' => 'iso-8859-3' ,
'iso8859-4' => 'iso-8859-4' ,
'iso8859-5' => 'iso-8859-5' ,
'iso8859-6' => 'iso-8859-6' ,
'iso8859-7' => 'iso-8859-7' ,
'iso8859-8' => 'iso-8859-8' ,
'iso8859-9' => 'iso-8859-9' ,
'iso8859-10' => 'iso-8859-10' ,
'iso8859-11' => 'iso-8859-11' ,
'iso8859-12' => 'iso-8859-12' ,
'iso8859-13' => 'iso-8859-13' ,
'iso8859-14' => 'iso-8859-14' ,
'iso8859-15' => 'iso-8859-15' ,
'iso8859-16' => 'iso-8859-16' ,
'iso_8859-1' => 'iso-8859-1' ,
'iso_8859-2' => 'iso-8859-2' ,
'iso_8859-3' => 'iso-8859-3' ,
'iso_8859-4' => 'iso-8859-4' ,
'iso_8859-5' => 'iso-8859-5' ,
'iso_8859-6' => 'iso-8859-6' ,
'iso_8859-7' => 'iso-8859-7' ,
'iso_8859-8' => 'iso-8859-8' ,
'iso_8859-9' => 'iso-8859-9' ,
'iso_8859-10' => 'iso-8859-10' ,
'iso_8859-11' => 'iso-8859-11' ,
'iso_8859-12' => 'iso-8859-12' ,
'iso_8859-13' => 'iso-8859-13' ,
'iso_8859-14' => 'iso-8859-14' ,
'iso_8859-15' => 'iso-8859-15' ,
'iso_8859-16' => 'iso-8859-16' ,
'iso88591' => 'iso-8859-1' ,
'iso88592' => 'iso-8859-2' ,
'iso88593' => 'iso-8859-3' ,
'iso88594' => 'iso-8859-4' ,
'iso88595' => 'iso-8859-5' ,
'iso88596' => 'iso-8859-6' ,
'iso88597' => 'iso-8859-7' ,
'iso88598' => 'iso-8859-8' ,
'iso88599' => 'iso-8859-9' ,
'iso885910' => 'iso-8859-10' ,
'iso885911' => 'iso-8859-11' ,
'iso885912' => 'iso-8859-12' ,
'iso885913' => 'iso-8859-13' ,
'iso885914' => 'iso-8859-14' ,
'iso885915' => 'iso-8859-15' ,
'iso885916' => 'iso-8859-16' ,
);
private static $translitMap = array ();
private static $convertMap = array ();
private static $errorHandler ;
private static $lastError ;
private static $ulenMask = array ( " \xC0 " => 2 , " \xD0 " => 2 , " \xE0 " => 3 , " \xF0 " => 4 );
private static $isValidUtf8 ;
public static function iconv ( $inCharset , $outCharset , $str )
{
if ( '' === $str .= '' ) {
return '' ;
}
// Prepare for //IGNORE and //TRANSLIT
$translit = $ignore = '' ;
$outCharset = strtolower ( $outCharset );
$inCharset = strtolower ( $inCharset );
if ( '' === $outCharset ) {
$outCharset = 'iso-8859-1' ;
}
if ( '' === $inCharset ) {
$inCharset = 'iso-8859-1' ;
}
if ( '//translit' === substr ( $outCharset , - 10 )) {
$translit = '//TRANSLIT' ;
$outCharset = substr ( $outCharset , 0 , - 10 );
}
if ( '//ignore' === substr ( $outCharset , - 8 )) {
$ignore = '//IGNORE' ;
$outCharset = substr ( $outCharset , 0 , - 8 );
}
if ( '//translit' === substr ( $inCharset , - 10 )) {
$inCharset = substr ( $inCharset , 0 , - 10 );
}
if ( '//ignore' === substr ( $inCharset , - 8 )) {
$inCharset = substr ( $inCharset , 0 , - 8 );
}
if ( isset ( self :: $alias [ $inCharset ])) {
$inCharset = self :: $alias [ $inCharset ];
}
if ( isset ( self :: $alias [ $outCharset ])) {
$outCharset = self :: $alias [ $outCharset ];
}
// Load charset maps
if (( 'utf-8' !== $inCharset && ! self :: loadMap ( 'from.' , $inCharset , $inMap ))
|| ( 'utf-8' !== $outCharset && ! self :: loadMap ( 'to.' , $outCharset , $outMap ))) {
trigger_error ( sprintf ( self :: ERROR_WRONG_CHARSET , $inCharset , $outCharset ));
return false ;
}
if ( 'utf-8' !== $inCharset ) {
// Convert input to UTF-8
$result = '' ;
if ( self :: mapToUtf8 ( $result , $inMap , $str , $ignore )) {
$str = $result ;
} else {
$str = false ;
}
self :: $isValidUtf8 = true ;
} else {
self :: $isValidUtf8 = preg_match ( '//u' , $str );
if ( ! self :: $isValidUtf8 && ! $ignore ) {
trigger_error ( self :: ERROR_ILLEGAL_CHARACTER );
return false ;
}
if ( 'utf-8' === $outCharset ) {
// UTF-8 validation
$str = self :: utf8ToUtf8 ( $str , $ignore );
}
}
if ( 'utf-8' !== $outCharset && false !== $str ) {
// Convert output to UTF-8
$result = '' ;
if ( self :: mapFromUtf8 ( $result , $outMap , $str , $ignore , $translit )) {
return $result ;
}
return false ;
}
return $str ;
}
public static function iconv_mime_decode_headers ( $str , $mode = 0 , $charset = null )
{
if ( null === $charset ) {
$charset = self :: $internalEncoding ;
}
if ( false !== strpos ( $str , " \r " )) {
$str = strtr ( str_replace ( " \r \n " , " \n " , $str ), " \r " , " \n " );
}
$str = explode ( " \n \n " , $str , 2 );
$headers = array ();
$str = preg_split ( '/\n(?![ \t])/' , $str [ 0 ]);
foreach ( $str as $str ) {
$str = self :: iconv_mime_decode ( $str , $mode , $charset );
if ( false === $str ) {
return false ;
}
$str = explode ( ':' , $str , 2 );
if ( 2 === count ( $str )) {
if ( isset ( $headers [ $str [ 0 ]])) {
if ( ! is_array ( $headers [ $str [ 0 ]])) {
$headers [ $str [ 0 ]] = array ( $headers [ $str [ 0 ]]);
}
$headers [ $str [ 0 ]][] = ltrim ( $str [ 1 ]);
} else {
$headers [ $str [ 0 ]] = ltrim ( $str [ 1 ]);
}
}
}
return $headers ;
}
public static function iconv_mime_decode ( $str , $mode = 0 , $charset = null )
{
if ( null === $charset ) {
$charset = self :: $internalEncoding ;
}
if ( ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode ) {
$charset .= '//IGNORE' ;
}
if ( false !== strpos ( $str , " \r " )) {
$str = strtr ( str_replace ( " \r \n " , " \n " , $str ), " \r " , " \n " );
}
$str = preg_split ( '/\n(?![ \t])/' , rtrim ( $str ), 2 );
$str = preg_replace ( '/[ \t]*\n[ \t]+/' , ' ' , rtrim ( $str [ 0 ]));
$str = preg_split ( '/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/' , $str , - 1 , PREG_SPLIT_DELIM_CAPTURE );
$result = self :: iconv ( 'utf-8' , $charset , $str [ 0 ]);
if ( false === $result ) {
return false ;
}
$i = 1 ;
$len = count ( $str );
while ( $i < $len ) {
$c = strtolower ( $str [ $i ]);
if (( ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode )
&& 'utf-8' !== $c
&& ! isset ( self :: $alias [ $c ])
&& ! self :: loadMap ( 'from.' , $c , $d )) {
$d = false ;
} elseif ( 'B' === strtoupper ( $str [ $i + 1 ])) {
$d = base64_decode ( $str [ $i + 2 ]);
} else {
$d = rawurldecode ( strtr ( str_replace ( '%' , '%25' , $str [ $i + 2 ]), '=_' , '% ' ));
}
if ( false !== $d ) {
if ( '' !== $d ) {
if ( '' === $d = self :: iconv ( $c , $charset , $d )) {
$str [ $i + 3 ] = substr ( $str [ $i + 3 ], 1 );
} else {
$result .= $d ;
}
}
$d = self :: iconv ( 'utf-8' , $charset , $str [ $i + 3 ]);
if ( '' !== trim ( $d )) {
$result .= $d ;
}
} elseif ( ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode ) {
$result .= " =? { $str [ $i ] } ? { $str [ $i + 1 ] } ? { $str [ $i + 2 ] } ?= { $str [ $i + 3 ] } " ;
} else {
$result = false ;
break ;
}
$i += 4 ;
}
return $result ;
}
public static function iconv_get_encoding ( $type = 'all' )
{
switch ( $type ) {
case 'input_encoding' : return self :: $inputEncoding ;
case 'output_encoding' : return self :: $outputEncoding ;
case 'internal_encoding' : return self :: $internalEncoding ;
}
return array (
'input_encoding' => self :: $inputEncoding ,
'output_encoding' => self :: $outputEncoding ,
'internal_encoding' => self :: $internalEncoding ,
);
}
public static function iconv_set_encoding ( $type , $charset )
{
switch ( $type ) {
case 'input_encoding' : self :: $inputEncoding = $charset ; break ;
case 'output_encoding' : self :: $outputEncoding = $charset ; break ;
case 'internal_encoding' : self :: $internalEncoding = $charset ; break ;
default : return false ;
}
return true ;
}
public static function iconv_mime_encode ( $fieldName , $fieldValue , $pref = null )
{
if ( ! is_array ( $pref )) {
$pref = array ();
}
$pref += array (
'scheme' => 'B' ,
'input-charset' => self :: $internalEncoding ,
'output-charset' => self :: $internalEncoding ,
'line-length' => 76 ,
'line-break-chars' => " \r \n " ,
);
if ( preg_match ( '/[\x80-\xFF]/' , $fieldName )) {
$fieldName = '' ;
}
$scheme = strtoupper ( substr ( $pref [ 'scheme' ], 0 , 1 ));
$in = strtolower ( $pref [ 'input-charset' ]);
$out = strtolower ( $pref [ 'output-charset' ]);
if ( 'utf-8' !== $in && false === $fieldValue = self :: iconv ( $in , 'utf-8' , $fieldValue )) {
return false ;
}
preg_match_all ( '/./us' , $fieldValue , $chars );
$chars = isset ( $chars [ 0 ]) ? $chars [ 0 ] : array ();
$lineBreak = ( int ) $pref [ 'line-length' ];
$lineStart = " =? { $pref [ 'output-charset' ] } ? { $scheme } ? " ;
$lineLength = strlen ( $fieldName ) + 2 + strlen ( $lineStart ) + 2 ;
$lineOffset = strlen ( $lineStart ) + 3 ;
$lineData = '' ;
$fieldValue = array ();
$Q = 'Q' === $scheme ;
foreach ( $chars as $c ) {
if ( 'utf-8' !== $out && false === $c = self :: iconv ( 'utf-8' , $out , $c )) {
return false ;
}
$o = $Q
? $c = preg_replace_callback (
'/[=_\?\x00-\x1F\x80-\xFF]/' ,
array ( __CLASS__ , 'qpByteCallback' ),
$c
)
: base64_encode ( $lineData . $c );
if ( isset ( $o [ $lineBreak - $lineLength ])) {
if ( ! $Q ) {
$lineData = base64_encode ( $lineData );
}
$fieldValue [] = $lineStart . $lineData . '?=' ;
$lineLength = $lineOffset ;
$lineData = '' ;
}
$lineData .= $c ;
$Q && $lineLength += strlen ( $c );
}
if ( '' !== $lineData ) {
if ( ! $Q ) {
$lineData = base64_encode ( $lineData );
}
$fieldValue [] = $lineStart . $lineData . '?=' ;
}
return $fieldName . ': ' . implode ( $pref [ 'line-break-chars' ] . ' ' , $fieldValue );
}
public static function iconv_strlen ( $s , $encoding = null )
{
static $hasXml = null ;
if ( null === $hasXml ) {
$hasXml = extension_loaded ( 'xml' );
}
if ( $hasXml ) {
return self :: strlen1 ( $s , $encoding );
}
return self :: strlen2 ( $s , $encoding );
}
public static function strlen1 ( $s , $encoding = null )
{
if ( null === $encoding ) {
$encoding = self :: $internalEncoding ;
}
if ( 0 !== stripos ( $encoding , 'utf-8' ) && false === $s = self :: iconv ( $encoding , 'utf-8' , $s )) {
return false ;
}
return strlen ( utf8_decode ( $s ));
}
public static function strlen2 ( $s , $encoding = null )
{
if ( null === $encoding ) {
$encoding = self :: $internalEncoding ;
}
if ( 0 !== stripos ( $encoding , 'utf-8' ) && false === $s = self :: iconv ( $encoding , 'utf-8' , $s )) {
return false ;
}
$ulenMask = self :: $ulenMask ;
$i = 0 ;
$j = 0 ;
$len = strlen ( $s );
while ( $i < $len ) {
$u = $s [ $i ] & " \xF0 " ;
$i += isset ( $ulenMask [ $u ]) ? $ulenMask [ $u ] : 1 ;
++ $j ;
}
return $j ;
}
public static function iconv_strpos ( $haystack , $needle , $offset = 0 , $encoding = null )
{
if ( null === $encoding ) {
$encoding = self :: $internalEncoding ;
}
if ( 0 !== stripos ( $encoding , 'utf-8' )) {
if ( false === $haystack = self :: iconv ( $encoding , 'utf-8' , $haystack )) {
return false ;
}
if ( false === $needle = self :: iconv ( $encoding , 'utf-8' , $needle )) {
return false ;
}
}
if ( $offset = ( int ) $offset ) {
$haystack = self :: iconv_substr ( $haystack , $offset , 2147483647 , 'utf-8' );
}
$pos = strpos ( $haystack , $needle );
return false === $pos ? false : ( $offset + ( $pos ? self :: iconv_strlen ( substr ( $haystack , 0 , $pos ), 'utf-8' ) : 0 ));
}
public static function iconv_strrpos ( $haystack , $needle , $encoding = null )
{
if ( null === $encoding ) {
$encoding = self :: $internalEncoding ;
}
if ( 0 !== stripos ( $encoding , 'utf-8' )) {
if ( false === $haystack = self :: iconv ( $encoding , 'utf-8' , $haystack )) {
return false ;
}
if ( false === $needle = self :: iconv ( $encoding , 'utf-8' , $needle )) {
return false ;
}
}
$pos = isset ( $needle [ 0 ]) ? strrpos ( $haystack , $needle ) : false ;
return false === $pos ? false : self :: iconv_strlen ( $pos ? substr ( $haystack , 0 , $pos ) : $haystack , 'utf-8' );
}
public static function iconv_substr ( $s , $start , $length = 2147483647 , $encoding = null )
{
if ( null === $encoding ) {
$encoding = self :: $internalEncoding ;
}
if ( 0 !== stripos ( $encoding , 'utf-8' )) {
$encoding = null ;
} elseif ( false === $s = self :: iconv ( $encoding , 'utf-8' , $s )) {
return false ;
}
$s .= '' ;
$slen = self :: iconv_strlen ( $s , 'utf-8' );
$start = ( int ) $start ;
if ( 0 > $start ) {
$start += $slen ;
}
if ( 0 > $start ) {
return false ;
}
if ( $start >= $slen ) {
return false ;
}
$rx = $slen - $start ;
if ( 0 > $length ) {
$length += $rx ;
}
if ( 0 === $length ) {
return '' ;
}
if ( 0 > $length ) {
return false ;
}
if ( $length > $rx ) {
$length = $rx ;
}
$rx = '/^' . ( $start ? self :: pregOffset ( $start ) : '' ) . '(' . self :: pregOffset ( $length ) . ')/u' ;
$s = preg_match ( $rx , $s , $s ) ? $s [ 1 ] : '' ;
if ( null === $encoding ) {
return $s ;
}
return self :: iconv ( 'utf-8' , $encoding , $s );
}
private static function loadMap ( $type , $charset , & $map )
{
if ( ! isset ( self :: $convertMap [ $type . $charset ])) {
if ( false === $map = self :: getData ( $type . $charset )) {
if ( 'to.' === $type && self :: loadMap ( 'from.' , $charset , $map )) {
$map = array_flip ( $map );
} else {
return false ;
}
}
self :: $convertMap [ $type . $charset ] = $map ;
} else {
$map = self :: $convertMap [ $type . $charset ];
}
return true ;
}
private static function utf8ToUtf8 ( $str , $ignore )
{
$ulenMask = self :: $ulenMask ;
$valid = self :: $isValidUtf8 ;
$u = $str ;
$i = $j = 0 ;
$len = strlen ( $str );
while ( $i < $len ) {
if ( $str [ $i ] < " \x80 " ) {
$u [ $j ++ ] = $str [ $i ++ ];
} else {
$ulen = $str [ $i ] & " \xF0 " ;
$ulen = isset ( $ulenMask [ $ulen ]) ? $ulenMask [ $ulen ] : 1 ;
$uchr = substr ( $str , $i , $ulen );
if ( 1 === $ulen || ! ( $valid || preg_match ( '/^.$/us' , $uchr ))) {
if ( $ignore ) {
++ $i ;
continue ;
}
trigger_error ( self :: ERROR_ILLEGAL_CHARACTER );
return false ;
} else {
$i += $ulen ;
}
$u [ $j ++ ] = $uchr [ 0 ];
isset ( $uchr [ 1 ]) && 0 !== ( $u [ $j ++ ] = $uchr [ 1 ])
&& isset ( $uchr [ 2 ]) && 0 !== ( $u [ $j ++ ] = $uchr [ 2 ])
&& isset ( $uchr [ 3 ]) && 0 !== ( $u [ $j ++ ] = $uchr [ 3 ]);
}
}
return substr ( $u , 0 , $j );
}
private static function mapToUtf8 ( & $result , $map , $str , $ignore )
{
$len = strlen ( $str );
for ( $i = 0 ; $i < $len ; ++ $i ) {
if ( isset ( $str [ $i + 1 ], $map [ $str [ $i ] . $str [ $i + 1 ]])) {
$result .= $map [ $str [ $i ] . $str [ ++ $i ]];
} elseif ( isset ( $map [ $str [ $i ]])) {
$result .= $map [ $str [ $i ]];
} elseif ( ! $ignore ) {
trigger_error ( self :: ERROR_ILLEGAL_CHARACTER );
return false ;
}
}
return true ;
}
private static function mapFromUtf8 ( & $result , $map , $str , $ignore , $translit )
{
$ulenMask = self :: $ulenMask ;
$valid = self :: $isValidUtf8 ;
if ( $translit && ! self :: $translitMap ) {
self :: $translitMap = self :: getData ( 'translit' );
}
$i = 0 ;
$len = strlen ( $str );
while ( $i < $len ) {
if ( $str [ $i ] < " \x80 " ) {
$uchr = $str [ $i ++ ];
} else {
$ulen = $str [ $i ] & " \xF0 " ;
$ulen = isset ( $ulenMask [ $ulen ]) ? $ulenMask [ $ulen ] : 1 ;
$uchr = substr ( $str , $i , $ulen );
if ( $ignore && ( 1 === $ulen || ! ( $valid || preg_match ( '/^.$/us' , $uchr )))) {
++ $i ;
continue ;
} else {
$i += $ulen ;
}
}
if ( isset ( $map [ $uchr ])) {
$result .= $map [ $uchr ];
} elseif ( $translit ) {
if ( isset ( self :: $translitMap [ $uchr ])) {
$uchr = self :: $translitMap [ $uchr ];
} elseif ( $uchr >= " \xC3 \x80 " ) {
$uchr = \Normalizer :: normalize ( $uchr , \Normalizer :: NFD );
if ( $uchr [ 0 ] < " \x80 " ) {
$uchr = $uchr [ 0 ];
} elseif ( $ignore ) {
continue ;
} else {
return false ;
}
}
$str = $uchr . substr ( $str , $i );
$len = strlen ( $str );
$i = 0 ;
} elseif ( ! $ignore ) {
return false ;
}
}
return true ;
}
private static function qpByteCallback ( $m )
{
return '=' . strtoupper ( dechex ( ord ( $m [ 0 ])));
}
private static function pregOffset ( $offset )
{
$rx = array ();
$offset = ( int ) $offset ;
while ( $offset > 65535 ) {
$rx [] = '.{65535}' ;
$offset -= 65535 ;
}
return implode ( '' , $rx ) . '.{' . $offset . '}' ;
}
private static function getData ( $file )
{
2017-02-02 16:28:38 -08:00
if ( file_exists ( $file = __DIR__ . '/Resources/charset/' . $file . '.php' )) {
return require $file ;
2016-04-20 09:56:34 -07:00
}
return false ;
}
}