2015-08-17 17:00:26 -07:00
< ? php
/*
* This file is part of the Symfony package .
*
* ( c ) Fabien Potencier < fabien @ symfony . com >
*
* For the full copyright and license information , please view the LICENSE
* file that was distributed with this source code .
*/
namespace Symfony\Component\Yaml ;
2018-11-23 12:29:20 +00:00
use Symfony\Component\Yaml\Exception\ParseException ;
2015-08-17 17:00:26 -07:00
/**
* Unescaper encapsulates unescaping rules for single and double - quoted
* YAML strings .
*
* @ author Matthew Lewinski < matthew @ lewinski . org >
2016-04-20 09:56:34 -07:00
*
* @ internal
2015-08-17 17:00:26 -07:00
*/
class Unescaper
{
/**
* Regex fragment that matches an escaped character in a double quoted string .
*/
2016-04-20 09:56:34 -07:00
const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)' ;
2015-08-17 17:00:26 -07:00
/**
* Unescapes a single quoted string .
*
2017-02-02 16:28:38 -08:00
* @ param string $value A single quoted string
2015-08-17 17:00:26 -07:00
*
2017-02-02 16:28:38 -08:00
* @ return string The unescaped string
2015-08-17 17:00:26 -07:00
*/
public function unescapeSingleQuotedString ( $value )
{
return str_replace ( '\'\'' , '\'' , $value );
}
/**
* Unescapes a double quoted string .
*
2017-02-02 16:28:38 -08:00
* @ param string $value A double quoted string
2015-08-17 17:00:26 -07:00
*
2017-02-02 16:28:38 -08:00
* @ return string The unescaped string
2015-08-17 17:00:26 -07:00
*/
public function unescapeDoubleQuotedString ( $value )
{
2018-11-23 12:29:20 +00:00
$callback = function ( $match ) {
return $this -> unescapeCharacter ( $match [ 0 ]);
2015-08-17 17:00:26 -07:00
};
// evaluate the string
return preg_replace_callback ( '/' . self :: REGEX_ESCAPED_CHARACTER . '/u' , $callback , $value );
}
/**
* Unescapes a character that was found in a double - quoted string .
*
* @ param string $value An escaped character
*
* @ return string The unescaped character
*/
2018-11-23 12:29:20 +00:00
private function unescapeCharacter ( $value )
2015-08-17 17:00:26 -07:00
{
2016-04-20 09:56:34 -07:00
switch ( $value [ 1 ]) {
2015-08-17 17:00:26 -07:00
case '0' :
return " \x0 " ;
case 'a' :
return " \x7 " ;
case 'b' :
return " \x8 " ;
case 't' :
return " \t " ;
case " \t " :
return " \t " ;
case 'n' :
return " \n " ;
case 'v' :
return " \xB " ;
case 'f' :
return " \xC " ;
case 'r' :
return " \r " ;
case 'e' :
return " \x1B " ;
case ' ' :
return ' ' ;
case '"' :
return '"' ;
case '/' :
return '/' ;
case '\\' :
return '\\' ;
case 'N' :
// U+0085 NEXT LINE
return " \xC2 \x85 " ;
case '_' :
// U+00A0 NO-BREAK SPACE
return " \xC2 \xA0 " ;
case 'L' :
// U+2028 LINE SEPARATOR
return " \xE2 \x80 \xA8 " ;
case 'P' :
// U+2029 PARAGRAPH SEPARATOR
return " \xE2 \x80 \xA9 " ;
case 'x' :
return self :: utf8chr ( hexdec ( substr ( $value , 2 , 2 )));
case 'u' :
return self :: utf8chr ( hexdec ( substr ( $value , 2 , 4 )));
case 'U' :
return self :: utf8chr ( hexdec ( substr ( $value , 2 , 8 )));
2016-04-20 09:56:34 -07:00
default :
2018-11-23 12:29:20 +00:00
throw new ParseException ( sprintf ( 'Found unknown escape character "%s".' , $value ));
2015-08-17 17:00:26 -07:00
}
}
/**
* Get the UTF - 8 character for the given code point .
*
* @ param int $c The unicode code point
*
* @ return string The corresponding UTF - 8 character
*/
private static function utf8chr ( $c )
{
if ( 0x80 > $c %= 0x200000 ) {
2018-11-23 12:29:20 +00:00
return \chr ( $c );
2015-08-17 17:00:26 -07:00
}
if ( 0x800 > $c ) {
2018-11-23 12:29:20 +00:00
return \chr ( 0xC0 | $c >> 6 ) . \chr ( 0x80 | $c & 0x3F );
2015-08-17 17:00:26 -07:00
}
if ( 0x10000 > $c ) {
2018-11-23 12:29:20 +00:00
return \chr ( 0xE0 | $c >> 12 ) . \chr ( 0x80 | $c >> 6 & 0x3F ) . \chr ( 0x80 | $c & 0x3F );
2015-08-17 17:00:26 -07:00
}
2018-11-23 12:29:20 +00:00
return \chr ( 0xF0 | $c >> 18 ) . \chr ( 0x80 | $c >> 12 & 0x3F ) . \chr ( 0x80 | $c >> 6 & 0x3F ) . \chr ( 0x80 | $c & 0x3F );
2015-08-17 17:00:26 -07:00
}
}