2015-08-17 17:00:26 -07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								< ? php  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								namespace  Drupal\Component\Transliteration ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  Implements  transliteration  without  using  the  PECL  extensions . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  Transliterations  are  done  character - by - character ,  by  looking  up  non - US - ASCII 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  characters  in  a  transliteration  database . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  The  database  comes  from  two  types  of  files ,  both  of  which  are  searched  for  in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  the  PhpTransliteration :: $dataDirectory  directory .  First ,  language - specific 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  overrides  are  searched  ( see  PhpTransliteration :: readLanguageOverrides ()) .  If 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  there  is  no  language - specific  override  for  a  character ,  the  generic 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  transliteration  character  tables  are  searched  ( see 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  PhpTransliteration :: readGenericData ()) .  If  looking  up  the  character  in  the 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  generic  table  results  in  a  NULL  value ,  or  an  illegal  character  is 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  encountered ,  then  a  substitute  character  is  returned . 
							 
						 
					
						
							
								
									
										
										
										
											2016-04-20 09:56:34 -07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  Some  parts  of  this  code  were  derived  from  the  MediaWiki  project ' s  UtfNormal 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  class ,  Copyright  ©  2004  Brion  Vibber  < brion @ pobox . com > , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  http :// www . mediawiki . org / 
							 
						 
					
						
							
								
									
										
										
										
											2015-08-17 17:00:26 -07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								 */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  PhpTransliteration  implements  TransliterationInterface  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Directory  where  data  for  transliteration  resides . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  The  constructor  sets  this  ( by  default )  to  subdirectory  'data'  underneath 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  the  directory  where  the  class ' s  PHP  file  resides . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ var  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  $dataDirectory ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Associative  array  of  language - specific  character  transliteration  tables . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  The  outermost  array  keys  are  language  codes .  For  each  language  code  key , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  the  value  is  an  array  whose  keys  are  Unicode  character  codes ,  and  whose 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  values  are  the  transliterations  of  those  characters  to  US - ASCII .  This  is 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  set  up  as  needed  in  PhpTransliteration :: replace ()  by  calling 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  PhpTransliteration :: readLanguageOverrides () . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ var  array 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  $languageOverrides  =  array (); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Non - language - specific  transliteration  tables . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Array  whose  keys  are  the  upper  two  bytes  of  the  Unicode  character ,  and 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  whose  values  are  an  array  of  transliterations  for  each  lower - two  bytes 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  character  code .  This  is  set  up  as  needed  in  PhpTransliteration :: replace () 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  by  calling  PhpTransliteration :: readGenericData () . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ var  array 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  $genericMap  =  array (); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Constructs  a  transliteration  object . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  string  $data_directory 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    ( optional )  The  directory  where  data  files  reside .  If  omitted ,  defaults 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    to  subdirectory  'data'  underneath  the  directory  where  the  class ' s  PHP 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    file  resides . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  public  function  __construct ( $data_directory  =  NULL )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $this -> dataDirectory  =  ( isset ( $data_directory ))  ?  $data_directory  :  __DIR__  .  '/data' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  { @ inheritdoc } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  public  function  removeDiacritics ( $string )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $result  =  '' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    foreach  ( preg_split ( '//u' ,  $string ,  0 ,  PREG_SPLIT_NO_EMPTY )  as  $character )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $code  =  self :: ordUTF8 ( $character ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // These two Unicode ranges include the accented US-ASCII letters, with a
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // few characters that aren't accented letters mixed in. So define the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // ranges and the excluded characters.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $range1  =  $code  >  0x00bf  &&  $code  <  0x017f ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $exclusions_range1  =  array ( 0x00d0 ,  0x00d7 ,  0x00f0 ,  0x00f7 ,  0x0138 ,  0x014a ,  0x014b ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $range2  =  $code  >  0x01cc  &&  $code  <  0x0250 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $exclusions_range2  =  array ( 0x01DD ,  0x01f7 ,  0x021c ,  0x021d ,  0x0220 ,  0x0221 ,  0x0241 ,  0x0242 ,  0x0245 ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $replacement  =  $character ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      if  (( $range1  &&  ! in_array ( $code ,  $exclusions_range1 ))  ||  ( $range2  &&  ! in_array ( $code ,  $exclusions_range2 )))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        $to_add  =  $this -> lookupReplacement ( $code ,  'xyz' ); 
							 
						 
					
						
							
								
									
										
										
										
											2016-06-02 15:56:09 -07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        if  ( strlen ( $to_add )  ===  1 )  { 
							 
						 
					
						
							
								
									
										
										
										
											2015-08-17 17:00:26 -07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								          $replacement  =  $to_add ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $result  .=  $replacement ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  $result ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  { @ inheritdoc } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  public  function  transliterate ( $string ,  $langcode  =  'en' ,  $unknown_character  =  '?' ,  $max_length  =  NULL )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $result  =  '' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $length  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Split into Unicode characters and transliterate each one.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    foreach  ( preg_split ( '//u' ,  $string ,  0 ,  PREG_SPLIT_NO_EMPTY )  as  $character )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $code  =  self :: ordUTF8 ( $character ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      if  ( $code  ==  - 1 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        $to_add  =  $unknown_character ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        $to_add  =  $this -> replace ( $code ,  $langcode ,  $unknown_character ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Check if this exceeds the maximum allowed length.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      if  ( isset ( $max_length ))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        $length  +=  strlen ( $to_add ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  ( $length  >  $max_length )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          // There is no more space.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          return  $result ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $result  .=  $to_add ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  $result ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Finds  the  character  code  for  a  UTF - 8  character :  like  ord ()  but  for  UTF - 8. 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  string  $character 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    A  single  UTF - 8  character . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ return  int 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    The  character  code ,  or  - 1  if  an  illegal  character  is  found . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  static  function  ordUTF8 ( $character )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $first_byte  =  ord ( $character [ 0 ]); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  (( $first_byte  &  0x80 )  ==  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Single-byte form: 0xxxxxxxx.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  $first_byte ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  (( $first_byte  &  0xe0 )  ==  0xc0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Two-byte form: 110xxxxx 10xxxxxx.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  (( $first_byte  &  0x1f )  <<  6 )  +  ( ord ( $character [ 1 ])  &  0x3f ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  (( $first_byte  &  0xf0 )  ==  0xe0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Three-byte form: 1110xxxx 10xxxxxx 10xxxxxx.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  (( $first_byte  &  0x0f )  <<  12 )  +  (( ord ( $character [ 1 ])  &  0x3f )  <<  6 )  +  ( ord ( $character [ 2 ])  &  0x3f ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  (( $first_byte  &  0xf8 )  ==  0xf0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Four-byte form: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  (( $first_byte  &  0x07 )  <<  18 )  +  (( ord ( $character [ 1 ])  &  0x3f )  <<  12 )  +  (( ord ( $character [ 2 ])  &  0x3f )  <<  6 )  +  ( ord ( $character [ 3 ])  &  0x3f ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Other forms are not legal.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Replaces  a  single  Unicode  character  using  the  transliteration  database . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  int  $code 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    The  character  code  of  a  Unicode  character . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  string  $langcode 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    The  language  code  of  the  language  the  character  is  in . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  string  $unknown_character 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    The  character  to  substitute  for  characters  without  transliterated 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    equivalents . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ return  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    US - ASCII  replacement  character .  If  it  has  a  mapping ,  it  is  returned ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    otherwise ,  $unknown_character  is  returned .  The  replacement  can  contain 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    multiple  characters . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  function  replace ( $code ,  $langcode ,  $unknown_character )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( $code  <  0x80 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // Already lower ASCII.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  chr ( $code ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // See if there is a language-specific override for this character.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( ! isset ( $this -> languageOverrides [ $langcode ]))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $this -> readLanguageOverrides ( $langcode ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( isset ( $this -> languageOverrides [ $langcode ][ $code ]))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      return  $this -> languageOverrides [ $langcode ][ $code ]; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  $this -> lookupReplacement ( $code ,  $unknown_character ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Look  up  the  generic  replacement  for  a  UTF - 8  character  code . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  $code 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    The  UTF - 8  character  code . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  string  $unknown_character 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    ( optional )  The  character  to  substitute  for  characters  without  entries  in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    the  replacement  tables . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ return  string 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    US - ASCII  replacement  characters .  If  it  has  a  mapping ,  it  is  returned ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    otherwise ,  $unknown_character  is  returned .  The  replacement  can  contain 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    multiple  characters . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  function  lookupReplacement ( $code ,  $unknown_character  =  '?' )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // See if there is a generic mapping for this character.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $bank  =  $code  >>  8 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( ! isset ( $this -> genericMap [ $bank ]))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $this -> readGenericData ( $bank ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $code  =  $code  &  0xff ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return  isset ( $this -> genericMap [ $bank ][ $code ])  ?  $this -> genericMap [ $bank ][ $code ]  :  $unknown_character ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Reads  in  language  overrides  for  a  language  code . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  The  data  is  read  from  files  named  " $langcode .php "  in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  PhpTransliteration :: $dataDirectory .  These  files  should  set  up  an  array 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  variable  $overrides  with  an  element  whose  key  is  $langcode  and  whose  value 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  is  an  array  whose  keys  are  character  codes ,  and  whose  values  are  their 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  transliterations  in  this  language .  The  character  codes  can  be  for  any  valid 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Unicode  character ,  independent  of  the  number  of  bytes . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  $langcode 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    Code  for  the  language  to  read . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  function  readLanguageOverrides ( $langcode )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Figure out the file name to use by sanitizing the language code,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // just in case.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $file  =  $this -> dataDirectory  .  '/'  .  preg_replace ( '/[^a-zA-Z\-]/' ,  '' ,  $langcode )  .  '.php' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Read in this file, which should set up a variable called $overrides,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // which will be local to this function.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( is_file ( $file ))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      include  $file ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( ! isset ( $overrides )  ||  ! is_array ( $overrides ))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $overrides  =  array ( $langcode  =>  array ()); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $this -> languageOverrides [ $langcode ]  =  $overrides [ $langcode ]; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Reads  in  generic  transliteration  data  for  a  bank  of  characters . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  The  data  is  read  in  from  a  file  named  " x $bank .php "  ( with  $bank  in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  hexadecimal  notation )  in  PhpTransliteration :: $dataDirectory .  These  files 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  should  set  up  a  variable  $bank  containing  an  array  whose  numerical  indices 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  are  the  remaining  two  bytes  of  the  character  code ,  and  whose  values  are  the 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  transliterations  of  these  characters  into  US - ASCII .  Note  that  the  maximum 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Unicode  character  that  can  be  encoded  in  this  way  is  4  bytes . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @ param  $bank 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *    First  two  bytes  of  the  Unicode  character ,  or  0  for  the  ASCII  range . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected  function  readGenericData ( $bank )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Figure out the file name.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $file  =  $this -> dataDirectory  .  '/x'  .  sprintf ( '%02x' ,  $bank )  .  '.php' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Read in this file, which should set up a variable called $base, which
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // will be local to this function.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( is_file ( $file ))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      include  $file ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    if  ( ! isset ( $base )  ||  ! is_array ( $base ))  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      $base  =  array (); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Save this data.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    $this -> genericMap [ $bank ]  =  $base ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  } 
							 
						 
					
						
							
								
									
										
										
										
											2016-06-02 15:56:09 -07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-08-17 17:00:26 -07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}