2015-08-17 17:00:26 -07:00
< ? php
/*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* " AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
* LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
* SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
* LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
* DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
* ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*
* This software consists of voluntary contributions made by many individuals
* and is licensed under the MIT license . For more information , see
* < http :// www . doctrine - project . org >.
*/
namespace Doctrine\Common\Lexer ;
/**
* Base class for writing simple lexers , i . e . for creating small DSLs .
*
2015-08-27 12:03:05 -07:00
* @ since 2.0
* @ author Guilherme Blanco < guilhermeblanco @ hotmail . com >
* @ author Jonathan Wage < jonwage @ gmail . com >
* @ author Roman Borschel < roman @ code - factory . org >
2015-08-17 17:00:26 -07:00
*/
abstract class AbstractLexer
{
/**
2015-08-27 12:03:05 -07:00
* Lexer original input string .
*
* @ var string
*/
private $input ;
/**
* Array of scanned tokens .
*
* Each token is an associative array containing three items :
* - 'value' : the string value of the token in the input string
* - 'type' : the type of the token ( identifier , numeric , string , input
* parameter , none )
* - 'position' : the position of the token in the input string
*
* @ var array
2015-08-17 17:00:26 -07:00
*/
private $tokens = array ();
/**
2015-08-27 12:03:05 -07:00
* Current lexer position in input string .
*
* @ var integer
2015-08-17 17:00:26 -07:00
*/
private $position = 0 ;
/**
2015-08-27 12:03:05 -07:00
* Current peek of current lexer position .
*
* @ var integer
2015-08-17 17:00:26 -07:00
*/
private $peek = 0 ;
/**
2015-08-27 12:03:05 -07:00
* The next token in the input .
*
* @ var array
2015-08-17 17:00:26 -07:00
*/
public $lookahead ;
/**
2015-08-27 12:03:05 -07:00
* The last matched / seen token .
*
* @ var array
2015-08-17 17:00:26 -07:00
*/
public $token ;
/**
* Sets the input data to be tokenized .
*
* The Lexer is immediately reset and the new input tokenized .
* Any unprocessed tokens from any previous input are lost .
*
* @ param string $input The input to be tokenized .
2015-08-27 12:03:05 -07:00
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
public function setInput ( $input )
{
2015-08-27 12:03:05 -07:00
$this -> input = $input ;
2015-08-17 17:00:26 -07:00
$this -> tokens = array ();
2015-08-27 12:03:05 -07:00
2015-08-17 17:00:26 -07:00
$this -> reset ();
$this -> scan ( $input );
}
/**
* Resets the lexer .
2015-08-27 12:03:05 -07:00
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
public function reset ()
{
$this -> lookahead = null ;
$this -> token = null ;
$this -> peek = 0 ;
$this -> position = 0 ;
}
/**
* Resets the peek pointer to 0.
2015-08-27 12:03:05 -07:00
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
public function resetPeek ()
{
$this -> peek = 0 ;
}
/**
* Resets the lexer position on the input to the given position .
*
2015-08-27 12:03:05 -07:00
* @ param integer $position Position to place the lexical scanner .
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
public function resetPosition ( $position = 0 )
{
$this -> position = $position ;
}
2015-08-27 12:03:05 -07:00
/**
* Retrieve the original lexer ' s input until a given position .
*
* @ param integer $position
*
* @ return string
*/
public function getInputUntilPosition ( $position )
{
return substr ( $this -> input , 0 , $position );
}
2015-08-17 17:00:26 -07:00
/**
* Checks whether a given token matches the current lookahead .
*
* @ param integer | string $token
2015-08-27 12:03:05 -07:00
*
2015-08-17 17:00:26 -07:00
* @ return boolean
*/
public function isNextToken ( $token )
{
return null !== $this -> lookahead && $this -> lookahead [ 'type' ] === $token ;
}
/**
2015-08-27 12:03:05 -07:00
* Checks whether any of the given tokens matches the current lookahead .
2015-08-17 17:00:26 -07:00
*
* @ param array $tokens
2015-08-27 12:03:05 -07:00
*
2015-08-17 17:00:26 -07:00
* @ return boolean
*/
public function isNextTokenAny ( array $tokens )
{
return null !== $this -> lookahead && in_array ( $this -> lookahead [ 'type' ], $tokens , true );
}
/**
* Moves to the next token in the input string .
*
2015-08-27 12:03:05 -07:00
* @ return boolean
2015-08-17 17:00:26 -07:00
*/
public function moveNext ()
{
$this -> peek = 0 ;
$this -> token = $this -> lookahead ;
$this -> lookahead = ( isset ( $this -> tokens [ $this -> position ]))
? $this -> tokens [ $this -> position ++ ] : null ;
return $this -> lookahead !== null ;
}
/**
* Tells the lexer to skip input tokens until it sees a token with the given value .
*
* @ param string $type The token type to skip until .
2015-08-27 12:03:05 -07:00
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
public function skipUntil ( $type )
{
while ( $this -> lookahead !== null && $this -> lookahead [ 'type' ] !== $type ) {
$this -> moveNext ();
}
}
/**
2015-08-27 12:03:05 -07:00
* Checks if given value is identical to the given token .
2015-08-17 17:00:26 -07:00
*
2015-08-27 12:03:05 -07:00
* @ param mixed $value
2015-08-17 17:00:26 -07:00
* @ param integer $token
2015-08-27 12:03:05 -07:00
*
2015-08-17 17:00:26 -07:00
* @ return boolean
*/
public function isA ( $value , $token )
{
return $this -> getType ( $value ) === $token ;
}
/**
* Moves the lookahead token forward .
*
2015-08-27 12:03:05 -07:00
* @ return array | null The next token or NULL if there are no more tokens ahead .
2015-08-17 17:00:26 -07:00
*/
public function peek ()
{
if ( isset ( $this -> tokens [ $this -> position + $this -> peek ])) {
return $this -> tokens [ $this -> position + $this -> peek ++ ];
} else {
return null ;
}
}
/**
* Peeks at the next token , returns it and immediately resets the peek .
*
* @ return array | null The next token or NULL if there are no more tokens ahead .
*/
public function glimpse ()
{
$peek = $this -> peek ();
$this -> peek = 0 ;
return $peek ;
}
/**
* Scans the input string for tokens .
*
2015-08-27 12:03:05 -07:00
* @ param string $input A query string .
*
* @ return void
2015-08-17 17:00:26 -07:00
*/
protected function scan ( $input )
{
static $regex ;
if ( ! isset ( $regex )) {
2015-08-27 12:03:05 -07:00
$regex = sprintf (
'/(%s)|%s/%s' ,
implode ( ')|(' , $this -> getCatchablePatterns ()),
implode ( '|' , $this -> getNonCatchablePatterns ()),
$this -> getModifiers ()
);
2015-08-17 17:00:26 -07:00
}
$flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE ;
$matches = preg_split ( $regex , $input , - 1 , $flags );
foreach ( $matches as $match ) {
// Must remain before 'value' assignment since it can change content
$type = $this -> getType ( $match [ 0 ]);
$this -> tokens [] = array (
'value' => $match [ 0 ],
'type' => $type ,
'position' => $match [ 1 ],
);
}
}
/**
* Gets the literal for a given token .
*
* @ param integer $token
2015-08-27 12:03:05 -07:00
*
2015-08-17 17:00:26 -07:00
* @ return string
*/
public function getLiteral ( $token )
{
$className = get_class ( $this );
$reflClass = new \ReflectionClass ( $className );
$constants = $reflClass -> getConstants ();
foreach ( $constants as $name => $value ) {
if ( $value === $token ) {
return $className . '::' . $name ;
}
}
return $token ;
}
2015-08-27 12:03:05 -07:00
/**
* Regex modifiers
*
* @ return string
*/
protected function getModifiers ()
{
return 'i' ;
}
2015-08-17 17:00:26 -07:00
/**
* Lexical catchable patterns .
*
* @ return array
*/
abstract protected function getCatchablePatterns ();
/**
* Lexical non - catchable patterns .
*
* @ return array
*/
abstract protected function getNonCatchablePatterns ();
/**
* Retrieve token type . Also processes the token value if necessary .
*
* @ param string $value
2015-08-27 12:03:05 -07:00
*
2015-08-17 17:00:26 -07:00
* @ return integer
*/
abstract protected function getType ( & $value );
}