2018-11-23 12:29:20 +00:00
< ? php
2019-01-24 08:00:03 +00:00
2018-11-23 12:29:20 +00:00
namespace Masterminds\HTML5\Tests\Parser ;
use Masterminds\HTML5\Parser\UTF8Utils ;
use Masterminds\HTML5\Parser\Scanner ;
use Masterminds\HTML5\Parser\Tokenizer ;
class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
{
// ================================================================
// Additional assertions.
// ================================================================
2019-01-24 08:00:03 +00:00
2018-11-23 12:29:20 +00:00
/**
* Tests that an event matches both the event type and the expected value .
*
* @ param string $type
2019-01-24 08:00:03 +00:00
* Expected event type
2018-11-23 12:29:20 +00:00
* @ param string $expects
2019-01-24 08:00:03 +00:00
* The value expected in $event [ 'data' ][ 0 ]
2018-11-23 12:29:20 +00:00
*/
public function assertEventEquals ( $type , $expects , $event )
{
$this -> assertEquals ( $type , $event [ 'name' ], " Event $type for " . print_r ( $event , true ));
if ( is_array ( $expects )) {
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( $expects , $event [ 'data' ], " Event $type should equal " . print_r ( $expects , true ) . ': ' . print_r ( $event , true ));
2018-11-23 12:29:20 +00:00
} else {
$this -> assertEquals ( $expects , $event [ 'data' ][ 0 ], " Event $type should equal $expects : " . print_r ( $event , true ));
}
}
/**
* Assert that a given event is 'error' .
*/
public function assertEventError ( $event )
{
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 'error' , $event [ 'name' ], 'Expected error for event: ' . print_r ( $event , true ));
2018-11-23 12:29:20 +00:00
}
/**
* Asserts that all of the tests are good .
*
* This loops through a map of tests / expectations and runs a few assertions on each test .
*
* Checks :
* - depth ( if depth is > 0 )
* - event name
* - matches on event 0.
*/
protected function isAllGood ( $name , $depth , $tests , $debug = false )
{
foreach ( $tests as $try => $expects ) {
if ( $debug ) {
fprintf ( STDOUT , " %s expects %s \n " , $try , print_r ( $expects , true ));
}
$e = $this -> parse ( $try );
if ( $depth > 0 ) {
$this -> assertEquals ( $depth , $e -> depth (), " Expected depth $depth for test $try . " . print_r ( $e , true ));
}
$this -> assertEventEquals ( $name , $expects , $e -> get ( 0 ));
}
}
// ================================================================
// Utility functions.
// ================================================================
public function testParse ()
{
2019-01-24 08:00:03 +00:00
list ( $tok , $events ) = $this -> createTokenizer ( '' );
2018-11-23 12:29:20 +00:00
$tok -> parse ();
$e1 = $events -> get ( 0 );
$this -> assertEquals ( 1 , $events -> Depth ());
$this -> assertEquals ( 'eof' , $e1 [ 'name' ]);
}
public function testWhitespace ()
{
$spaces = ' ' ;
2019-01-24 08:00:03 +00:00
list ( $tok , $events ) = $this -> createTokenizer ( $spaces );
2018-11-23 12:29:20 +00:00
$tok -> parse ();
$this -> assertEquals ( 2 , $events -> depth ());
$e1 = $events -> get ( 0 );
$this -> assertEquals ( 'text' , $e1 [ 'name' ]);
$this -> assertEquals ( $spaces , $e1 [ 'data' ][ 0 ]);
}
public function testCharacterReference ()
{
$good = array (
'&' => '&' ,
'<' => '<' ,
'&' => '&' ,
2019-01-24 08:00:03 +00:00
'&' => '&' ,
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'text' , 2 , $good );
// Test with broken charref
$str = '&foo' ;
$events = $this -> parse ( $str );
$e1 = $events -> get ( 0 );
$this -> assertEquals ( 'error' , $e1 [ 'name' ]);
$str = 'oo' ;
$events = $this -> parse ( $str );
$e1 = $events -> get ( 0 );
$this -> assertEquals ( 'error' , $e1 [ 'name' ]);
$str = '&#foo' ;
$events = $this -> parse ( $str );
$e1 = $events -> get ( 0 );
$this -> assertEquals ( 'error' , $e1 [ 'name' ]);
// FIXME: Once the text processor is done, need to verify that the
// tokens are transformed correctly into text.
}
public function testBogusComment ()
{
$bogus = array (
'</+this is a bogus comment. +>' ,
'<!+this is a bogus comment. !>' ,
'<!D OCTYPE foo bar>' ,
'<!DOCTYEP foo bar>' ,
'<![CADATA[ TEST ]]>' ,
'<![CDATA Hello ]]>' ,
'<![CDATA[ Hello [[>' ,
'<!CDATA[[ test ]]>' ,
'<![CDATA[' ,
'<![CDATA[hellooooo hello' ,
'<? Hello World ?>' ,
2019-01-24 08:00:03 +00:00
'<? Hello World' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $bogus as $str ) {
$events = $this -> parse ( $str );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'comment' , $str , $events -> get ( 1 ));
}
}
public function testEndTag ()
{
$succeed = array (
'</a>' => 'a' ,
'</test>' => 'test' ,
' </ test
> ' => ' test ' ,
'</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend' ,
// See 8.2.4.10, which requires this and does not say error.
2019-01-24 08:00:03 +00:00
'</a<b>' => 'a<b' ,
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'endTag' , 2 , $succeed );
// Recoverable failures
$fail = array (
'</a class="monkey">' => 'a' ,
'</a <b>' => 'a' ,
'</a <b <c>' => 'a' ,
'</a is the loneliest letter>' => 'a' ,
2019-01-24 08:00:03 +00:00
'</a' => 'a' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $fail as $test => $result ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 3 , $events -> depth ());
// Should have triggered an error.
$this -> assertEventError ( $events -> get ( 0 ));
// Should have tried to parse anyway.
$this -> assertEventEquals ( 'endTag' , $result , $events -> get ( 1 ));
}
// BogoComments
$comments = array (
'</>' => '</>' ,
'</ >' => '</ >' ,
2019-01-24 08:00:03 +00:00
'</ a>' => '</ a>' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $comments as $test => $result ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 3 , $events -> depth ());
// Should have triggered an error.
$this -> assertEventError ( $events -> get ( 0 ));
// Should have tried to parse anyway.
$this -> assertEventEquals ( 'comment' , $result , $events -> get ( 1 ));
}
}
public function testComment ()
{
$good = array (
'<!--easy-->' => 'easy' ,
'<!-- 1 > 0 -->' => ' 1 > 0 ' ,
'<!-- --$i -->' => ' --$i ' ,
'<!----$i-->' => '--$i' ,
" <!-- \n Hello World. \n a--> " => " \n Hello World. \n a " ,
2019-01-24 08:00:03 +00:00
'<!-- <!-- -->' => ' <!-- ' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $good as $test => $expected ) {
$events = $this -> parse ( $test );
$this -> assertEventEquals ( 'comment' , $expected , $events -> get ( 0 ));
}
$fail = array (
'<!-->' => '' ,
'<!--Hello' => 'Hello' ,
" <!-- \0 Hello " => UTF8Utils :: FFFD . 'Hello' ,
2019-01-24 08:00:03 +00:00
'<!--' => '' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $fail as $test => $expected ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 3 , $events -> depth ());
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'comment' , $expected , $events -> get ( 1 ));
}
}
public function testCDATASection ()
{
$good = array (
'<![CDATA[ This is a test. ]]>' => ' This is a test. ' ,
'<![CDATA[CDATA]]>' => 'CDATA' ,
'<![CDATA[ ]] > ]]>' => ' ]] > ' ,
2019-01-24 08:00:03 +00:00
'<![CDATA[ ]]>' => ' ' ,
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'cdata' , 2 , $good );
}
public function testDoctype ()
{
$good = array (
'<!DOCTYPE html>' => array (
'html' ,
0 ,
null ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!doctype html>' => array (
'html' ,
0 ,
null ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!DocType html>' => array (
'html' ,
0 ,
null ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE \n html> " => array (
'html' ,
0 ,
null ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE \ fhtml> " => array (
'html' ,
0 ,
null ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html PUBLIC "foo bar">' => array (
'html' ,
EventStack :: DOCTYPE_PUBLIC ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE html PUBLIC 'foo bar'> " => array (
'html' ,
EventStack :: DOCTYPE_PUBLIC ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html PUBLIC "foo bar" >' => array (
'html' ,
EventStack :: DOCTYPE_PUBLIC ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE html \n PUBLIC \n 'foo bar'> " => array (
'html' ,
EventStack :: DOCTYPE_PUBLIC ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html SYSTEM "foo bar">' => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE html SYSTEM 'foo bar'> " => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html SYSTEM "foo/bar" >' => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo/bar' ,
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <!DOCTYPE html \n SYSTEM \n 'foo bar'> " => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
false ,
),
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'doctype' , 2 , $good );
$bad = array (
'<!DOCTYPE>' => array (
null ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE >' => array (
null ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo PUB' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo PUB>' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo PUB "Looks good">' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo SYSTME "Looks good"' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
// Can't tell whether these are ids or ID types, since the context is chopped.
'<!DOCTYPE foo PUBLIC' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo PUBLIC>' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo SYSTEM' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE foo SYSTEM>' => array (
'foo' ,
EventStack :: DOCTYPE_NONE ,
null ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html SYSTEM "foo bar"' => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array (
'html' ,
EventStack :: DOCTYPE_SYSTEM ,
'foo bar' ,
2019-01-24 08:00:03 +00:00
true ,
),
2018-11-23 12:29:20 +00:00
);
foreach ( $bad as $test => $expects ) {
$events = $this -> parse ( $test );
// fprintf(STDOUT, $test . PHP_EOL);
$this -> assertEquals ( 3 , $events -> depth (), " Counting events for ' $test ': " . print_r ( $events , true ));
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'doctype' , $expects , $events -> get ( 1 ));
}
}
public function testProcessorInstruction ()
{
$good = array (
'<?hph ?>' => 'hph' ,
'<?hph echo "Hello World"; ?>' => array (
'hph' ,
2019-01-24 08:00:03 +00:00
'echo "Hello World"; ' ,
2018-11-23 12:29:20 +00:00
),
" <?hph \n echo 'Hello World'; \n ?> " => array (
'hph' ,
2019-01-24 08:00:03 +00:00
" echo 'Hello World'; \n " ,
),
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'pi' , 2 , $good );
}
/**
* This tests just simple tags .
*/
public function testSimpleTags ()
{
$open = array (
'<foo>' => 'foo' ,
'<FOO>' => 'foo' ,
'<fOO>' => 'foo' ,
'<foo >' => 'foo' ,
" <foo \n \n \n \n > " => 'foo' ,
2019-01-24 08:00:03 +00:00
'<foo:bar>' => 'foo:bar' ,
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'startTag' , 2 , $open );
$selfClose = array (
'<foo/>' => 'foo' ,
'<FOO/>' => 'foo' ,
'<foo />' => 'foo' ,
" <foo \n \n \n \n /> " => 'foo' ,
2019-01-24 08:00:03 +00:00
'<foo:bar/>' => 'foo:bar' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $selfClose as $test => $expects ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 2 , $events -> depth (), " Counting events for ' $test ' " . print_r ( $events , true ));
$this -> assertEventEquals ( 'startTag' , $expects , $events -> get ( 0 ));
$event = $events -> get ( 0 );
$this -> assertTrue ( $event [ 'data' ][ 2 ]);
}
$bad = array (
'<foo' => 'foo' ,
'<foo ' => 'foo' ,
'<foo/' => 'foo' ,
2019-01-24 08:00:03 +00:00
'<foo /' => 'foo' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $bad as $test => $expects ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 3 , $events -> depth (), " Counting events for ' $test ': " . print_r ( $events , true ));
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , $expects , $events -> get ( 1 ));
}
}
public function testTagsWithAttributeAndMissingName ()
{
$cases = array (
'<id="top_featured">' => 'id' ,
'<color="white">' => 'color' ,
" <class='neaktivni_stranka'> " => 'class' ,
'<bgcolor="white">' => 'bgcolor' ,
2019-01-24 08:00:03 +00:00
'<class="nom">' => 'class' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $cases as $html => $expected ) {
$events = $this -> parse ( $html );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventError ( $events -> get ( 1 ));
$this -> assertEventError ( $events -> get ( 2 ));
$this -> assertEventEquals ( 'startTag' , $expected , $events -> get ( 3 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 4 ));
}
}
public function testTagNotClosedAfterTagName ()
{
$cases = array (
2019-01-24 08:00:03 +00:00
'<noscript<img>' => array (
2018-11-23 12:29:20 +00:00
'noscript' ,
2019-01-24 08:00:03 +00:00
'img' ,
2018-11-23 12:29:20 +00:00
),
'<center<a>' => array (
'center' ,
2019-01-24 08:00:03 +00:00
'a' ,
2018-11-23 12:29:20 +00:00
),
'<br<br>' => array (
'br' ,
2019-01-24 08:00:03 +00:00
'br' ,
),
2018-11-23 12:29:20 +00:00
);
foreach ( $cases as $html => $expected ) {
$events = $this -> parse ( $html );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , $expected [ 0 ], $events -> get ( 1 ));
$this -> assertEventEquals ( 'startTag' , $expected [ 1 ], $events -> get ( 2 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 3 ));
}
$events = $this -> parse ( '<span<>02</span>' );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , 'span' , $events -> get ( 1 ));
$this -> assertEventError ( $events -> get ( 2 ));
$this -> assertEventEquals ( 'text' , '>02' , $events -> get ( 3 ));
$this -> assertEventEquals ( 'endTag' , 'span' , $events -> get ( 4 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 5 ));
$events = $this -> parse ( '<p</p>' );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , 'p' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'p' , $events -> get ( 2 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 3 ));
$events = $this -> parse ( '<strong><WordPress</strong>' );
$this -> assertEventEquals ( 'startTag' , 'strong' , $events -> get ( 0 ));
$this -> assertEventError ( $events -> get ( 1 ));
$this -> assertEventEquals ( 'startTag' , 'wordpress' , $events -> get ( 2 ));
$this -> assertEventEquals ( 'endTag' , 'strong' , $events -> get ( 3 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 4 ));
$events = $this -> parse ( '<src=<a>' );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventError ( $events -> get ( 1 ));
$this -> assertEventError ( $events -> get ( 2 ));
$this -> assertEventEquals ( 'startTag' , 'src' , $events -> get ( 3 ));
$this -> assertEventEquals ( 'startTag' , 'a' , $events -> get ( 4 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 5 ));
$events = $this -> parse ( '<br...<a>' );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , 'br' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'eof' , null , $events -> get ( 2 ));
}
public function testIllegalTagNames ()
{
$cases = array (
'<li">' => 'li' ,
'<p">' => 'p' ,
'<b >' => 'b' ,
'<static*all>' => 'static' ,
'<h*0720/>' => 'h' ,
'<st*ATTRIBUTE />' => 'st' ,
);
foreach ( $cases as $html => $expected ) {
$events = $this -> parse ( $html );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , $expected , $events -> get ( 1 ));
}
}
/**
* @ depends testCharacterReference
*/
public function testTagAttributes ()
{
// Opening tags.
$good = array (
'<foo bar="baz">' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar=" baz ">' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => ' baz ' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo bar= \" \n baz \n \" > " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => " \n baz \n " ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo bar='baz'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar="A full sentence.">' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'A full sentence.' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo a='1' b= \" 2 \" > " => array (
'foo' ,
array (
'a' => '1' ,
2019-01-24 08:00:03 +00:00
'b' => '2' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo ns:bar='baz'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'ns:bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo a='blue&red'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'a' => 'blue&red' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo a='blue&red'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'a' => 'blue&red' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo a='blue&&&red'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'a' => 'blue&&&red' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo a='blue&&red'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'a' => 'blue&&red' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
" <foo \n bar='baz' \n > " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<doe a deer>' => array (
'doe' ,
array (
'a' => null ,
2019-01-24 08:00:03 +00:00
'deer' => null ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar=baz>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
// Updated for 8.1.2.3
'<foo bar = "baz" >' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
// The spec allows an unquoted value '/'. This will not be a closing
// tag.
'<foo bar=/>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => '/' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar=baz/>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz/' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
),
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'startTag' , 2 , $good );
// Self-closing tags.
$withEnd = array (
'<foo bar="baz"/>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<foo BAR="baz"/>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'baz' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
'<foo BAR="BAZ"/>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'BAZ' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
" <foo a='1' b= \" 2 \" c=3 d/> " => array (
'foo' ,
array (
'a' => '1' ,
'b' => '2' ,
'c' => '3' ,
2019-01-24 08:00:03 +00:00
'd' => null ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
true ,
),
2018-11-23 12:29:20 +00:00
);
$this -> isAllGood ( 'startTag' , 2 , $withEnd );
// Cause a parse error.
$bad = array (
// This will emit an entity lookup failure for &+dark.
" <foo a='blue&+dark'> " => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'a' => 'blue&+dark' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar=>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => null ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar="oh' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'oh' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo bar=oh">' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'oh"' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
// these attributes are ignored because of current implementation
// of method "DOMElement::setAttribute"
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
'<foo b"="baz">' => array (
'foo' ,
array (),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo 2abc="baz">' => array (
'foo' ,
array (),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo ?="baz">' => array (
'foo' ,
array (),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo foo?bar="baz">' => array (
'foo' ,
array (),
2019-01-24 08:00:03 +00:00
false ,
),
2018-11-23 12:29:20 +00:00
)
;
foreach ( $bad as $test => $expects ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 3 , $events -> depth (), " Counting events for ' $test ': " . print_r ( $events , true ));
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , $expects , $events -> get ( 1 ));
}
// Cause multiple parse errors.
$reallyBad = array (
'<foo ="bar">' => array (
'foo' ,
array (
'=' => null ,
2019-01-24 08:00:03 +00:00
'"bar"' => null ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
),
'<foo////>' => array (
'foo' ,
array (),
2019-01-24 08:00:03 +00:00
true ,
2018-11-23 12:29:20 +00:00
),
// character "&" in unquoted attribute shouldn't cause an infinite loop
'<foo bar=index.php?str=1&id=29>' => array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'bar' => 'index.php?str=1&id=29' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
),
2018-11-23 12:29:20 +00:00
);
foreach ( $reallyBad as $test => $expects ) {
$events = $this -> parse ( $test );
// fprintf(STDOUT, $test . print_r($events, true));
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventError ( $events -> get ( 1 ));
// $this->assertEventEquals('startTag', $expects, $events->get(1));
}
// Regression: Malformed elements should be detected.
// '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
$events = $this -> parse ( '<foo baz="1" <bar></foo>' );
$this -> assertEventError ( $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , array (
'foo' ,
array (
2019-01-24 08:00:03 +00:00
'baz' => '1' ,
2018-11-23 12:29:20 +00:00
),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
), $events -> get ( 1 ));
$this -> assertEventEquals ( 'startTag' , array (
'bar' ,
array (),
2019-01-24 08:00:03 +00:00
false ,
2018-11-23 12:29:20 +00:00
), $events -> get ( 2 ));
$this -> assertEventEquals ( 'endTag' , array (
2019-01-24 08:00:03 +00:00
'foo' ,
2018-11-23 12:29:20 +00:00
), $events -> get ( 3 ));
}
public function testRawText ()
{
$good = array (
'<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop' ,
'<script><not/><the/><tag></script>' => '<not/><the/><tag>' ,
'<script><<<<<<<<</script>' => '<<<<<<<<' ,
'<script>hello</script</script>' => 'hello</script' ,
" <script> \n hello</script \n </script> " => " \n hello</script \n " ,
'<script>&</script>' => '&' ,
'<script><!--not a comment--></script>' => '<!--not a comment-->' ,
2019-01-24 08:00:03 +00:00
'<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $good as $test => $expects ) {
$events = $this -> parse ( $test );
$this -> assertEventEquals ( 'startTag' , 'script' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'text' , $expects , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'script' , $events -> get ( 2 ));
}
$bad = array (
'<script>&</script' => '&</script' ,
2019-01-24 08:00:03 +00:00
'<script>Hello world' => 'Hello world' ,
2018-11-23 12:29:20 +00:00
);
foreach ( $bad as $test => $expects ) {
$events = $this -> parse ( $test );
$this -> assertEquals ( 4 , $events -> depth (), " Counting events for ' $test ': " . print_r ( $events , true ));
$this -> assertEventEquals ( 'startTag' , 'script' , $events -> get ( 0 ));
$this -> assertEventError ( $events -> get ( 1 ));
$this -> assertEventEquals ( 'text' , $expects , $events -> get ( 2 ));
}
// Testing case sensitivity
$events = $this -> parse ( '<TITLE>a test</TITLE>' );
$this -> assertEventEquals ( 'startTag' , 'title' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'text' , 'a test' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'title' , $events -> get ( 2 ));
// Testing end tags with whitespaces
$events = $this -> parse ( '<title>Whitespaces are tasty</title >' );
$this -> assertEventEquals ( 'startTag' , 'title' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'text' , 'Whitespaces are tasty' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'title' , $events -> get ( 2 ));
}
public function testRcdata ()
{
2019-01-24 08:00:03 +00:00
list ( $tok , $events ) = $this -> createTokenizer ( '<title>'<!-- not a comment --></TITLE>' );
2018-11-23 12:29:20 +00:00
$tok -> setTextMode ( \Masterminds\HTML5\Elements :: TEXT_RCDATA , 'title' );
$tok -> parse ();
$this -> assertEventEquals ( 'text' , " '<!-- not a comment --> " , $events -> get ( 1 ));
}
public function testText ()
{
$events = $this -> parse ( 'a<br>b' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 4 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'text' , 'a' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'startTag' , 'br' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'text' , 'b' , $events -> get ( 2 ));
$events = $this -> parse ( '<a>Test</a>' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 4 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'startTag' , 'a' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'text' , 'Test' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'a' , $events -> get ( 2 ));
$events = $this -> parse ( '<p>0</p><p>1</p>' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 7 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'startTag' , 'p' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'text' , '0' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'endTag' , 'p' , $events -> get ( 2 ));
$this -> assertEventEquals ( 'startTag' , 'p' , $events -> get ( 3 ));
$this -> assertEventEquals ( 'text' , '1' , $events -> get ( 4 ));
$this -> assertEventEquals ( 'endTag' , 'p' , $events -> get ( 5 ));
$events = $this -> parse ( 'a<![CDATA[test]]>b' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 4 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'text' , 'a' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'cdata' , 'test' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'text' , 'b' , $events -> get ( 2 ));
$events = $this -> parse ( 'a<!--test-->b' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 4 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'text' , 'a' , $events -> get ( 0 ));
$this -> assertEventEquals ( 'comment' , 'test' , $events -> get ( 1 ));
$this -> assertEventEquals ( 'text' , 'b' , $events -> get ( 2 ));
$events = $this -> parse ( 'a&b' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 2 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'text' , 'a&b' , $events -> get ( 0 ));
$events = $this -> parse ( 'a²b' );
2019-01-24 08:00:03 +00:00
$this -> assertEquals ( 2 , $events -> depth (), 'Events: ' . print_r ( $events , true ));
2018-11-23 12:29:20 +00:00
$this -> assertEventEquals ( 'text' , 'a²b' , $events -> get ( 0 ));
}
// ================================================================
// Utility functions.
// ================================================================
protected function createTokenizer ( $string , $debug = false )
{
$eventHandler = new EventStack ();
$scanner = new Scanner ( $string );
$scanner -> debug = $debug ;
return array (
new Tokenizer ( $scanner , $eventHandler ),
2019-01-24 08:00:03 +00:00
$eventHandler ,
2018-11-23 12:29:20 +00:00
);
}
public function parse ( $string , $debug = false )
{
2019-01-24 08:00:03 +00:00
list ( $tok , $events ) = $this -> createTokenizer ( $string , $debug );
2018-11-23 12:29:20 +00:00
$tok -> parse ();
return $events ;
}
}