composer update

This commit is contained in:
Oliver Davies 2019-01-24 08:00:03 +00:00
parent f6abc3dce2
commit 71dfaca858
1753 changed files with 45274 additions and 14619 deletions

View file

@ -1,9 +1,7 @@
<?php
namespace Masterminds;
use Masterminds\HTML5\Parser\FileInputStream;
use Masterminds\HTML5\Parser\InputStream;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\DOMTreeBuilder;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
@ -12,37 +10,38 @@ use Masterminds\HTML5\Serializer\Traverser;
/**
* This class offers convenience methods for parsing and serializing HTML5.
* It is roughly designed to mirror the \DOMDocument class that is
* provided with most versions of PHP.
* It is roughly designed to mirror the \DOMDocument native class.
*/
class HTML5
{
/**
* Global options for the parser and serializer.
*
* @var array
*/
protected $options = array(
// If the serializer should encode all entities.
'encode_entities' => false
private $defaultOptions = array(
// Whether the serializer should aggressively encode all characters as entities.
'encode_entities' => false,
// Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
'disable_html_ns' => false,
);
protected $errors = array();
public function __construct(array $options = array())
public function __construct(array $defaultOptions = array())
{
$this->options = array_merge($this->options, $options);
$this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
}
/**
* Get the default options.
* Get the current default options.
*
* @return array The default options.
* @return array
*/
public function getOptions()
{
return $this->options;
return $this->defaultOptions;
}
/**
@ -55,14 +54,13 @@ class HTML5
*
* The rules governing parsing are set out in the HTML 5 spec.
*
* @param string|resource $file
* The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options
* Configuration options when parsing the HTML
* @param string|resource $file The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
* library, and should have been included with your version of PHP.
*/
public function load($file, array $options = array())
{
@ -80,12 +78,11 @@ class HTML5
* Take a string of HTML 5 (or earlier) and parse it into a
* DOMDocument.
*
* @param string $string
* A html5 document as a string.
* @param array $options
* Configuration options when parsing the HTML
* @param string $string A html5 document as a string.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
* almost all distribtions of PHP.
* almost all distribtions of PHP.
*/
public function loadHTML($string, array $options = array())
{
@ -98,15 +95,13 @@ class HTML5
* This is here to provide backwards compatibility with the
* PHP DOM implementation. It simply calls load().
*
* @param string $file
* The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options
* Configuration options when parsing the HTML
* @param string $file The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
* library, and should have been included with your version of PHP.
*/
public function loadHTMLFile($file, array $options = array())
{
@ -116,11 +111,11 @@ class HTML5
/**
* Parse a HTML fragment from a string.
*
* @param string $string The HTML5 fragment as a string.
* @param array $options Configuration options when parsing the HTML
* @param string $string the HTML5 fragment as a string
* @param array $options Configuration options when parsing the HTML
*
* @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
* almost all distributions of PHP.
* almost all distributions of PHP.
*/
public function loadHTMLFragment($string, array $options = array())
{
@ -128,7 +123,7 @@ class HTML5
}
/**
* Return all errors encountered into parsing phase
* Return all errors encountered into parsing phase.
*
* @return array
*/
@ -138,7 +133,7 @@ class HTML5
}
/**
* Return true it some errors were encountered into parsing phase
* Return true it some errors were encountered into parsing phase.
*
* @return bool
*/
@ -148,23 +143,20 @@ class HTML5
}
/**
* Parse an input stream.
*
* Lower-level loading function. This requires an input stream instead
* of a string, file, or resource.
* Parse an input string.
*
* @param string $input
* @param array $options
* @param array $options
*
* @return \DOMDocument
*/
public function parse($input, array $options = array())
{
$this->errors = array();
$options = array_merge($this->getOptions(), $options);
$options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(false, $options);
$scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
@ -178,17 +170,17 @@ class HTML5
* Lower-level loading function. This requires an input stream instead
* of a string, file, or resource.
*
* @param string $input The input data to parse in the form of a string.
* @param array $options An array of options
* @param string $input The input data to parse in the form of a string.
* @param array $options An array of options.
*
* @return \DOMDocumentFragment
*/
public function parseFragment($input, array $options = array())
{
$options = array_merge($this->getOptions(), $options);
$options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(true, $options);
$scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
@ -199,15 +191,12 @@ class HTML5
/**
* Save a DOM into a given file as HTML5.
*
* @param mixed $dom
* The DOM to be serialized.
* @param string $file
* The filename to be written.
* @param array $options
* Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
* @param mixed $dom The DOM to be serialized.
* @param string|resource $file The filename to be written or resource to write to.
* @param array $options Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
*/
public function save($dom, $file, $options = array())
{
@ -216,9 +205,9 @@ class HTML5
$stream = $file;
$close = false;
} else {
$stream = fopen($file, 'w');
$stream = fopen($file, 'wb');
}
$options = array_merge($this->getOptions(), $options);
$options = array_merge($this->defaultOptions, $options);
$rules = new OutputRules($stream, $options);
$trav = new Traverser($dom, $stream, $rules, $options);
@ -232,21 +221,19 @@ class HTML5
/**
* Convert a DOM into an HTML5 string.
*
* @param mixed $dom
* The DOM to be serialized.
* @param array $options
* Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
* @param mixed $dom The DOM to be serialized.
* @param array $options Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
*
* @return string A HTML5 documented generated from the DOM.
*/
public function saveHTML($dom, $options = array())
{
$stream = fopen('php://temp', 'w');
$this->save($dom, $stream, array_merge($this->getOptions(), $options));
$stream = fopen('php://temp', 'wb');
$this->save($dom, $stream, array_merge($this->defaultOptions, $options));
return stream_get_contents($stream, - 1, 0);
return stream_get_contents($stream, -1, 0);
}
}

View file

@ -2,6 +2,7 @@
/**
* Provide general element functions.
*/
namespace Masterminds\HTML5;
/**
@ -17,7 +18,6 @@ namespace Masterminds\HTML5;
*/
class Elements
{
/**
* Indicates an element is described in the specification.
*/
@ -77,116 +77,116 @@ class Elements
* @var array
*/
public static $html5 = array(
"a" => 1,
"abbr" => 1,
"address" => 65, // NORMAL | BLOCK_TAG
"area" => 9, // NORMAL | VOID_TAG
"article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"audio" => 1, // NORMAL
"b" => 1,
"base" => 9, // NORMAL | VOID_TAG
"bdi" => 1,
"bdo" => 1,
"blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"body" => 1,
"br" => 9, // NORMAL | VOID_TAG
"button" => 1,
"canvas" => 65, // NORMAL | BLOCK_TAG
"caption" => 1,
"cite" => 1,
"code" => 1,
"col" => 9, // NORMAL | VOID_TAG
"colgroup" => 1,
"command" => 9, // NORMAL | VOID_TAG
'a' => 1,
'abbr' => 1,
'address' => 65, // NORMAL | BLOCK_TAG
'area' => 9, // NORMAL | VOID_TAG
'article' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'aside' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'audio' => 1, // NORMAL
'b' => 1,
'base' => 9, // NORMAL | VOID_TAG
'bdi' => 1,
'bdo' => 1,
'blockquote' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'body' => 1,
'br' => 9, // NORMAL | VOID_TAG
'button' => 1,
'canvas' => 65, // NORMAL | BLOCK_TAG
'caption' => 1,
'cite' => 1,
'code' => 1,
'col' => 9, // NORMAL | VOID_TAG
'colgroup' => 1,
'command' => 9, // NORMAL | VOID_TAG
// "data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
"datalist" => 1,
"dd" => 65, // NORMAL | BLOCK_TAG
"del" => 1,
"details" => 17, // NORMAL | AUTOCLOSE_P,
"dfn" => 1,
"dialog" => 17, // NORMAL | AUTOCLOSE_P,
"div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"dt" => 1,
"em" => 1,
"embed" => 9, // NORMAL | VOID_TAG
"fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"head" => 1,
"header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"hr" => 73, // NORMAL | VOID_TAG
"html" => 1,
"i" => 1,
"iframe" => 3, // NORMAL | TEXT_RAW
"img" => 9, // NORMAL | VOID_TAG
"input" => 9, // NORMAL | VOID_TAG
"kbd" => 1,
"ins" => 1,
"keygen" => 9, // NORMAL | VOID_TAG
"label" => 1,
"legend" => 1,
"li" => 1,
"link" => 9, // NORMAL | VOID_TAG
"map" => 1,
"mark" => 1,
"menu" => 17, // NORMAL | AUTOCLOSE_P,
"meta" => 9, // NORMAL | VOID_TAG
"meter" => 1,
"nav" => 17, // NORMAL | AUTOCLOSE_P,
"noscript" => 65, // NORMAL | BLOCK_TAG
"object" => 1,
"ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"optgroup" => 1,
"option" => 1,
"output" => 65, // NORMAL | BLOCK_TAG
"p" => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE
"param" => 9, // NORMAL | VOID_TAG
"pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"progress" => 1,
"q" => 1,
"rp" => 1,
"rt" => 1,
"ruby" => 1,
"s" => 1,
"samp" => 1,
"script" => 3, // NORMAL | TEXT_RAW
"section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"select" => 1,
"small" => 1,
"source" => 9, // NORMAL | VOID_TAG
"span" => 1,
"strong" => 1,
"style" => 3, // NORMAL | TEXT_RAW
"sub" => 1,
"summary" => 17, // NORMAL | AUTOCLOSE_P,
"sup" => 1,
"table" => 65, // NORMAL | BLOCK_TAG
"tbody" => 1,
"td" => 1,
"textarea" => 5, // NORMAL | TEXT_RCDATA
"tfoot" => 65, // NORMAL | BLOCK_TAG
"th" => 1,
"thead" => 1,
"time" => 1,
"title" => 5, // NORMAL | TEXT_RCDATA
"tr" => 1,
"track" => 9, // NORMAL | VOID_TAG
"u" => 1,
"ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"var" => 1,
"video" => 65, // NORMAL | BLOCK_TAG
"wbr" => 9, // NORMAL | VOID_TAG
'datalist' => 1,
'dd' => 65, // NORMAL | BLOCK_TAG
'del' => 1,
'details' => 17, // NORMAL | AUTOCLOSE_P,
'dfn' => 1,
'dialog' => 17, // NORMAL | AUTOCLOSE_P,
'div' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'dl' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'dt' => 1,
'em' => 1,
'embed' => 9, // NORMAL | VOID_TAG
'fieldset' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'figcaption' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'figure' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'footer' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'form' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h1' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h2' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h3' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h4' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h5' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'h6' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'head' => 1,
'header' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'hgroup' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'hr' => 73, // NORMAL | VOID_TAG
'html' => 1,
'i' => 1,
'iframe' => 3, // NORMAL | TEXT_RAW
'img' => 9, // NORMAL | VOID_TAG
'input' => 9, // NORMAL | VOID_TAG
'kbd' => 1,
'ins' => 1,
'keygen' => 9, // NORMAL | VOID_TAG
'label' => 1,
'legend' => 1,
'li' => 1,
'link' => 9, // NORMAL | VOID_TAG
'map' => 1,
'mark' => 1,
'menu' => 17, // NORMAL | AUTOCLOSE_P,
'meta' => 9, // NORMAL | VOID_TAG
'meter' => 1,
'nav' => 17, // NORMAL | AUTOCLOSE_P,
'noscript' => 65, // NORMAL | BLOCK_TAG
'object' => 1,
'ol' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'optgroup' => 1,
'option' => 1,
'output' => 65, // NORMAL | BLOCK_TAG
'p' => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE
'param' => 9, // NORMAL | VOID_TAG
'pre' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'progress' => 1,
'q' => 1,
'rp' => 1,
'rt' => 1,
'ruby' => 1,
's' => 1,
'samp' => 1,
'script' => 3, // NORMAL | TEXT_RAW
'section' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'select' => 1,
'small' => 1,
'source' => 9, // NORMAL | VOID_TAG
'span' => 1,
'strong' => 1,
'style' => 3, // NORMAL | TEXT_RAW
'sub' => 1,
'summary' => 17, // NORMAL | AUTOCLOSE_P,
'sup' => 1,
'table' => 65, // NORMAL | BLOCK_TAG
'tbody' => 1,
'td' => 1,
'textarea' => 5, // NORMAL | TEXT_RCDATA
'tfoot' => 65, // NORMAL | BLOCK_TAG
'th' => 1,
'thead' => 1,
'time' => 1,
'title' => 5, // NORMAL | TEXT_RCDATA
'tr' => 1,
'track' => 9, // NORMAL | VOID_TAG
'u' => 1,
'ul' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
'var' => 1,
'video' => 65, // NORMAL | BLOCK_TAG
'wbr' => 9, // NORMAL | VOID_TAG
// Legacy?
'basefont' => 8, // VOID_TAG
@ -202,7 +202,7 @@ class Elements
'marquee' => 0,
'isindex' => 8, // VOID_TAG
'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
'noembed' => 2 // RAW_TEXT
'noembed' => 2, // RAW_TEXT
);
/**
@ -215,45 +215,45 @@ class Elements
* @var array
*/
public static $mathml = array(
"maction" => 1,
"maligngroup" => 1,
"malignmark" => 1,
"math" => 1,
"menclose" => 1,
"merror" => 1,
"mfenced" => 1,
"mfrac" => 1,
"mglyph" => 1,
"mi" => 1,
"mlabeledtr" => 1,
"mlongdiv" => 1,
"mmultiscripts" => 1,
"mn" => 1,
"mo" => 1,
"mover" => 1,
"mpadded" => 1,
"mphantom" => 1,
"mroot" => 1,
"mrow" => 1,
"ms" => 1,
"mscarries" => 1,
"mscarry" => 1,
"msgroup" => 1,
"msline" => 1,
"mspace" => 1,
"msqrt" => 1,
"msrow" => 1,
"mstack" => 1,
"mstyle" => 1,
"msub" => 1,
"msup" => 1,
"msubsup" => 1,
"mtable" => 1,
"mtd" => 1,
"mtext" => 1,
"mtr" => 1,
"munder" => 1,
"munderover" => 1
'maction' => 1,
'maligngroup' => 1,
'malignmark' => 1,
'math' => 1,
'menclose' => 1,
'merror' => 1,
'mfenced' => 1,
'mfrac' => 1,
'mglyph' => 1,
'mi' => 1,
'mlabeledtr' => 1,
'mlongdiv' => 1,
'mmultiscripts' => 1,
'mn' => 1,
'mo' => 1,
'mover' => 1,
'mpadded' => 1,
'mphantom' => 1,
'mroot' => 1,
'mrow' => 1,
'ms' => 1,
'mscarries' => 1,
'mscarry' => 1,
'msgroup' => 1,
'msline' => 1,
'mspace' => 1,
'msqrt' => 1,
'msrow' => 1,
'mstack' => 1,
'mstyle' => 1,
'msub' => 1,
'msup' => 1,
'msubsup' => 1,
'mtable' => 1,
'mtd' => 1,
'mtext' => 1,
'mtr' => 1,
'munder' => 1,
'munderover' => 1,
);
/**
@ -269,90 +269,90 @@ class Elements
* @var array
*/
public static $svg = array(
"a" => 1,
"altGlyph" => 1,
"altGlyphDef" => 1,
"altGlyphItem" => 1,
"animate" => 1,
"animateColor" => 1,
"animateMotion" => 1,
"animateTransform" => 1,
"circle" => 1,
"clipPath" => 1,
"color-profile" => 1,
"cursor" => 1,
"defs" => 1,
"desc" => 1,
"ellipse" => 1,
"feBlend" => 1,
"feColorMatrix" => 1,
"feComponentTransfer" => 1,
"feComposite" => 1,
"feConvolveMatrix" => 1,
"feDiffuseLighting" => 1,
"feDisplacementMap" => 1,
"feDistantLight" => 1,
"feFlood" => 1,
"feFuncA" => 1,
"feFuncB" => 1,
"feFuncG" => 1,
"feFuncR" => 1,
"feGaussianBlur" => 1,
"feImage" => 1,
"feMerge" => 1,
"feMergeNode" => 1,
"feMorphology" => 1,
"feOffset" => 1,
"fePointLight" => 1,
"feSpecularLighting" => 1,
"feSpotLight" => 1,
"feTile" => 1,
"feTurbulence" => 1,
"filter" => 1,
"font" => 1,
"font-face" => 1,
"font-face-format" => 1,
"font-face-name" => 1,
"font-face-src" => 1,
"font-face-uri" => 1,
"foreignObject" => 1,
"g" => 1,
"glyph" => 1,
"glyphRef" => 1,
"hkern" => 1,
"image" => 1,
"line" => 1,
"linearGradient" => 1,
"marker" => 1,
"mask" => 1,
"metadata" => 1,
"missing-glyph" => 1,
"mpath" => 1,
"path" => 1,
"pattern" => 1,
"polygon" => 1,
"polyline" => 1,
"radialGradient" => 1,
"rect" => 1,
"script" => 3, // NORMAL | RAW_TEXT
"set" => 1,
"stop" => 1,
"style" => 3, // NORMAL | RAW_TEXT
"svg" => 1,
"switch" => 1,
"symbol" => 1,
"text" => 1,
"textPath" => 1,
"title" => 1,
"tref" => 1,
"tspan" => 1,
"use" => 1,
"view" => 1,
"vkern" => 1
'a' => 1,
'altGlyph' => 1,
'altGlyphDef' => 1,
'altGlyphItem' => 1,
'animate' => 1,
'animateColor' => 1,
'animateMotion' => 1,
'animateTransform' => 1,
'circle' => 1,
'clipPath' => 1,
'color-profile' => 1,
'cursor' => 1,
'defs' => 1,
'desc' => 1,
'ellipse' => 1,
'feBlend' => 1,
'feColorMatrix' => 1,
'feComponentTransfer' => 1,
'feComposite' => 1,
'feConvolveMatrix' => 1,
'feDiffuseLighting' => 1,
'feDisplacementMap' => 1,
'feDistantLight' => 1,
'feFlood' => 1,
'feFuncA' => 1,
'feFuncB' => 1,
'feFuncG' => 1,
'feFuncR' => 1,
'feGaussianBlur' => 1,
'feImage' => 1,
'feMerge' => 1,
'feMergeNode' => 1,
'feMorphology' => 1,
'feOffset' => 1,
'fePointLight' => 1,
'feSpecularLighting' => 1,
'feSpotLight' => 1,
'feTile' => 1,
'feTurbulence' => 1,
'filter' => 1,
'font' => 1,
'font-face' => 1,
'font-face-format' => 1,
'font-face-name' => 1,
'font-face-src' => 1,
'font-face-uri' => 1,
'foreignObject' => 1,
'g' => 1,
'glyph' => 1,
'glyphRef' => 1,
'hkern' => 1,
'image' => 1,
'line' => 1,
'linearGradient' => 1,
'marker' => 1,
'mask' => 1,
'metadata' => 1,
'missing-glyph' => 1,
'mpath' => 1,
'path' => 1,
'pattern' => 1,
'polygon' => 1,
'polyline' => 1,
'radialGradient' => 1,
'rect' => 1,
'script' => 3, // NORMAL | RAW_TEXT
'set' => 1,
'stop' => 1,
'style' => 3, // NORMAL | RAW_TEXT
'svg' => 1,
'switch' => 1,
'symbol' => 1,
'text' => 1,
'textPath' => 1,
'title' => 1,
'tref' => 1,
'tspan' => 1,
'use' => 1,
'view' => 1,
'vkern' => 1,
);
/**
* Some attributes in SVG are case sensetitive.
* Some attributes in SVG are case sensitive.
*
* This map contains key/value pairs with the key as the lowercase attribute
* name and the value with the correct casing.
@ -419,11 +419,11 @@ class Elements
'viewtarget' => 'viewTarget',
'xchannelselector' => 'xChannelSelector',
'ychannelselector' => 'yChannelSelector',
'zoomandpan' => 'zoomAndPan'
'zoomandpan' => 'zoomAndPan',
);
/**
* Some SVG elements are case sensetitive.
* Some SVG elements are case sensitive.
* This map contains these.
*
* The map contains key/value store of the name is lowercase as the keys and
@ -465,7 +465,7 @@ class Elements
'glyphref' => 'glyphRef',
'lineargradient' => 'linearGradient',
'radialgradient' => 'radialGradient',
'textpath' => 'textPath'
'textpath' => 'textPath',
);
/**
@ -477,32 +477,26 @@ class Elements
*
* Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
*
* @param string $name
* The element name.
* @param int $mask
* One of the constants on this class.
* @return boolean true if the element matches the mask, false otherwise.
* @param string $name The element name.
* @param int $mask One of the constants on this class.
*
* @return bool true if the element matches the mask, false otherwise.
*/
public static function isA($name, $mask)
{
if (! static::isElement($name)) {
return false;
}
return (static::element($name) & $mask) == $mask;
return (static::element($name) & $mask) === $mask;
}
/**
* Test if an element is a valid html5 element.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return bool True if a html5 element and false otherwise.
* @return bool true if a html5 element and false otherwise.
*/
public static function isHtml5Element($name)
{
// html5 element names are case insensetitive. Forcing lowercase for the check.
// html5 element names are case insensitive. Forcing lowercase for the check.
// Do we need this check or will all data passed here already be lowercase?
return isset(static::$html5[strtolower($name)]);
}
@ -510,41 +504,37 @@ class Elements
/**
* Test if an element name is a valid MathML presentation element.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return bool True if a MathML name and false otherwise.
* @return bool true if a MathML name and false otherwise.
*/
public static function isMathMLElement($name)
{
// MathML is case-sensetitive unlike html5 elements.
// MathML is case-sensitive unlike html5 elements.
return isset(static::$mathml[$name]);
}
/**
* Test if an element is a valid SVG element.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return boolean True if a SVG element and false otherise.
* @return bool true if a SVG element and false otherise.
*/
public static function isSvgElement($name)
{
// SVG is case-sensetitive unlike html5 elements.
// SVG is case-sensitive unlike html5 elements.
return isset(static::$svg[$name]);
}
/**
* Is an element name valid in an html5 document.
*
* This includes html5 elements along with other allowed embedded content
* such as svg and mathml.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return bool True if valid and false otherwise.
* @return bool true if valid and false otherwise.
*/
public static function isElement($name)
{
@ -554,10 +544,9 @@ class Elements
/**
* Get the element mask for the given element name.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return int|bool The element mask or false if element does not exist.
* @return int the element mask.
*/
public static function element($name)
{
@ -571,16 +560,15 @@ class Elements
return static::$mathml[$name];
}
return false;
return 0;
}
/**
* Normalize a SVG element name to its proper case and form.
*
* @param string $name
* The name of the element.
* @param string $name The name of the element.
*
* @return string The normalized form of the element name.
* @return string the normalized form of the element name.
*/
public static function normalizeSvgElement($name)
{
@ -595,8 +583,7 @@ class Elements
/**
* Normalize a SVG attribute name to its proper case and form.
*
* @param string $name
* The name of the attribute.
* @param string $name The name of the attribute.
*
* @return string The normalized form of the attribute name.
*/
@ -612,11 +599,9 @@ class Elements
/**
* Normalize a MathML attribute name to its proper case and form.
*
* Note, all MathML element names are lowercase.
*
* @param string $name
* The name of the attribute.
* @param string $name The name of the attribute.
*
* @return string The normalized form of the attribute name.
*/
@ -625,7 +610,7 @@ class Elements
$name = strtolower($name);
// Only one attribute has a mixed case form for MathML.
if ($name == 'definitionurl') {
if ('definitionurl' === $name) {
$name = 'definitionURL';
}

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5;
/**
@ -7,7 +8,6 @@ namespace Masterminds\HTML5;
*/
class Entities
{
public static $byName = array(
'Aacute' => 'Á',
'Aacut' => 'Á',
@ -2231,6 +2231,6 @@ class Entities
'Zscr' => '𝒵',
'zscr' => '𝓏',
'zwj' => '',
'zwnj' => ''
'zwnj' => '',
);
}

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5;
/**

View file

@ -2,6 +2,7 @@
/**
* A handler for processor instructions.
*/
namespace Masterminds\HTML5;
/**
@ -18,7 +19,6 @@ namespace Masterminds\HTML5;
*/
interface InstructionProcessor
{
/**
* Process an individual processing instruction.
*
@ -28,16 +28,14 @@ interface InstructionProcessor
* - Making any subsequent modifications to the DOM by modifying the
* DOMElement or its attached DOM tree.
*
* @param DOMElement $element
* The parent element for the current processing instruction.
* @param string $name
* The instruction's name. E.g. `&lt;?php` has the name `php`.
* @param string $data
* All of the data between the opening and closing PI marks.
* @return DOMElement The element that should be considered "Current". This may just be
* the element passed in, but if the processor added more elements,
* it may choose to reset the current element to one of the elements
* it created. (When in doubt, return the element passed in.)
* @param \DOMElement $element The parent element for the current processing instruction.
* @param string $name The instruction's name. E.g. `&lt;?php` has the name `php`.
* @param string $data All of the data between the opening and closing PI marks.
*
* @return \DOMElement The element that should be considered "Current". This may just be
* the element passed in, but if the processor added more elements,
* it may choose to reset the current element to one of the elements
* it created. (When in doubt, return the element passed in.)
*/
public function process(\DOMElement $element, $name, $data);
}

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Entities;
@ -6,25 +7,22 @@ use Masterminds\HTML5\Entities;
/**
* Manage entity references.
*
* This is a simple resolver for HTML5 character reference entitites.
* See \Masterminds\HTML5\Entities for the list of supported entities.
* This is a simple resolver for HTML5 character reference entitites. See Entities for the list of supported entities.
*/
class CharacterReference
{
protected static $numeric_mask = array(
0x0,
0x2FFFF,
0,
0xFFFF
0xFFFF,
);
/**
* Given a name (e.g.
* 'amp'), lookup the UTF-8 character ('&')
* Given a name (e.g. 'amp'), lookup the UTF-8 character ('&').
*
* @param string $name The name to look up.
*
* @param string $name
* The name to look up.
* @return string The character sequence. In UTF-8 this may be more than one byte.
*/
public static function lookupName($name)
@ -33,21 +31,17 @@ class CharacterReference
return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : null;
}
/**
* Given a Unicode codepoint, return the UTF-8 character.
*
* (NOT USED ANYWHERE)
*/
/*
* public static function lookupCode($codePoint) { return 'POINT'; }
*/
/**
* Given a decimal number, return the UTF-8 character.
*
* @param $int
*
* @return false|string|string[]|null
*/
public static function lookupDecimal($int)
{
$entity = '&#' . $int . ';';
// UNTESTED: This may fail on some planes. Couldn't find full documentation
// on the value of the mask array.
return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
@ -55,6 +49,10 @@ class CharacterReference
/**
* Given a hexidecimal number, return the UTF-8 character.
*
* @param $hexdec
*
* @return false|string|string[]|null
*/
public static function lookupHex($hexdec)
{

View file

@ -1,7 +1,9 @@
<?php
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
use Masterminds\HTML5\InstructionProcessor;
/**
* Create an HTML5 DOM tree from events.
@ -24,7 +26,7 @@ use Masterminds\HTML5\Elements;
class DOMTreeBuilder implements EventHandler
{
/**
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@ -45,14 +47,14 @@ class DOMTreeBuilder implements EventHandler
const OPT_IMPLICIT_NS = 'implicit_namespaces';
/**
* Holds the HTML5 element names that causes a namespace switch
* Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
protected $nsRoots = array(
'html' => self::NAMESPACE_HTML,
'svg' => self::NAMESPACE_SVG,
'math' => self::NAMESPACE_MATHML
'math' => self::NAMESPACE_MATHML,
);
/**
@ -63,7 +65,7 @@ class DOMTreeBuilder implements EventHandler
protected $implicitNamespaces = array(
'xml' => self::NAMESPACE_XML,
'xmlns' => self::NAMESPACE_XMLNS,
'xlink' => self::NAMESPACE_XLINK
'xlink' => self::NAMESPACE_XLINK,
);
/**
@ -146,15 +148,15 @@ class DOMTreeBuilder implements EventHandler
protected $insertMode = 0;
/**
* Track if we are in an element that allows only inline child nodes
* Track if we are in an element that allows only inline child nodes.
*
* @var string|null
*/
protected $onlyInline;
/**
* Quirks mode is enabled by default.
* Any document that is missing the
* DT will be considered to be in quirks mode.
* Any document that is missing the DT will be considered to be in quirks mode.
*/
protected $quirks = true;
@ -175,24 +177,23 @@ class DOMTreeBuilder implements EventHandler
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
}
$this->errors = array();
$this->current = $this->doc; // ->documentElement;
// Create a rules engine for tags.
$this->rules = new TreeBuildingRules($this->doc);
$this->rules = new TreeBuildingRules();
$implicitNS = array();
if (isset($this->options[self::OPT_IMPLICIT_NS])) {
$implicitNS = $this->options[self::OPT_IMPLICIT_NS];
} elseif (isset($this->options["implicitNamespaces"])) {
$implicitNS = $this->options["implicitNamespaces"];
} elseif (isset($this->options['implicitNamespaces'])) {
$implicitNS = $this->options['implicitNamespaces'];
}
// Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
array_unshift($this->nsStack, $implicitNS + array(
'' => self::NAMESPACE_HTML
) + $this->implicitNamespaces);
array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
if ($isFragment) {
$this->insertMode = static::IM_IN_BODY;
@ -229,8 +230,10 @@ class DOMTreeBuilder implements EventHandler
*
* This is used for handling Processor Instructions as they are
* inserted. If omitted, PI's are inserted directly into the DOM tree.
*
* @param InstructionProcessor $proc
*/
public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
public function setInstructionProcessor(InstructionProcessor $proc)
{
$this->processor = $proc;
}
@ -242,7 +245,7 @@ class DOMTreeBuilder implements EventHandler
$this->quirks = $quirks;
if ($this->insertMode > static::IM_INITIAL) {
$this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
$this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
return;
}
@ -256,27 +259,32 @@ class DOMTreeBuilder implements EventHandler
* @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
* - XLink, MathML and SVG namespace handling
* - Omission rules: 8.1.2.4 Optional tags
*
* @param string $name
* @param array $attributes
* @param bool $selfClosing
*
* @return int
*/
public function startTag($name, $attributes = array(), $selfClosing = false)
{
// fprintf(STDOUT, $name);
$lname = $this->normalizeTagName($name);
// Make sure we have an html element.
if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
$this->startTag('html');
}
// Set quirks mode if we're at IM_INITIAL with no doctype.
if ($this->insertMode == static::IM_INITIAL) {
if ($this->insertMode === static::IM_INITIAL) {
$this->quirks = true;
$this->parseError("No DOCTYPE specified.");
$this->parseError('No DOCTYPE specified.');
}
// SPECIAL TAG HANDLING:
// Spec says do this, and "don't ask."
// find the spec where this is defined... looks problematic
if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
$name = 'img';
}
@ -292,7 +300,7 @@ class DOMTreeBuilder implements EventHandler
break;
case 'head':
if ($this->insertMode > static::IM_BEFORE_HEAD) {
$this->parseError("Unexpected head tag outside of head context.");
$this->parseError('Unexpected head tag outside of head context.');
} else {
$this->insertMode = static::IM_IN_HEAD;
}
@ -307,14 +315,14 @@ class DOMTreeBuilder implements EventHandler
$this->insertMode = static::IM_IN_MATHML;
break;
case 'noscript':
if ($this->insertMode == static::IM_IN_HEAD) {
if ($this->insertMode === static::IM_IN_HEAD) {
$this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
}
break;
}
// Special case handling for SVG.
if ($this->insertMode == static::IM_IN_SVG) {
if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@ -322,62 +330,58 @@ class DOMTreeBuilder implements EventHandler
// when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
array_unshift($this->nsStack, array(
'' => $this->nsRoots[$lname]
'' => $this->nsRoots[$lname],
) + $this->nsStack[0]);
$pushes ++;
++$pushes;
}
$needsWorkaround = false;
if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
// when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
foreach ($attributes as $aName => $aVal) {
if ($aName === 'xmlns') {
if ('xmlns' === $aName) {
$needsWorkaround = $aVal;
array_unshift($this->nsStack, array(
'' => $aVal
'' => $aVal,
) + $this->nsStack[0]);
$pushes ++;
} elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
++$pushes;
} elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
array_unshift($this->nsStack, array(
substr($aName, $pos + 1) => $aVal
substr($aName, $pos + 1) => $aVal,
) + $this->nsStack[0]);
$pushes ++;
++$pushes;
}
}
}
if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
$this->autoclose($this->onlyInline);
$this->onlyInline = null;
$this->autoclose($this->onlyInline);
$this->onlyInline = null;
}
try {
$prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
if ($needsWorkaround!==false) {
$xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
if (false !== $needsWorkaround) {
$xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
$frag = new \DOMDocument('1.0', 'UTF-8');
$frag->loadXML($xml);
$ele = $this->doc->importNode($frag->documentElement, true);
} else {
if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
$ele = $this->doc->createElement($lname);
} else {
$ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
}
}
} catch (\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}
if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
$this->onlyInline = $lname;
$this->onlyInline = $lname;
}
// When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
@ -396,23 +400,23 @@ class DOMTreeBuilder implements EventHandler
foreach ($attributes as $aName => $aVal) {
// xmlns attributes can't be set
if ($aName === 'xmlns') {
if ('xmlns' === $aName) {
continue;
}
if ($this->insertMode == static::IM_IN_SVG) {
if ($this->insertMode === static::IM_IN_SVG) {
$aName = Elements::normalizeSvgAttribute($aName);
} elseif ($this->insertMode == static::IM_IN_MATHML) {
} elseif ($this->insertMode === static::IM_IN_MATHML) {
$aName = Elements::normalizeMathMlAttribute($aName);
}
try {
$prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
if ($prefix==='xmlns') {
$ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
} elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
$ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
if ('xmlns' === $prefix) {
$ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
} elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
$ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
} else {
$ele->setAttribute($aName, $aVal);
}
@ -422,19 +426,19 @@ class DOMTreeBuilder implements EventHandler
}
// This is necessary on a non-DTD schema, like HTML5.
if ($aName == 'id') {
if ('id' === $aName) {
$ele->setIdAttribute('id', true);
}
}
// Some elements have special processing rules. Handle those separately.
if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
// Some elements have special processing rules. Handle those separately.
$this->current = $this->rules->evaluate($ele, $this->current);
} // Otherwise, it's a standard element.
else {
} else {
// Otherwise, it's a standard element.
$this->current->appendChild($ele);
if (! Elements::isA($name, Elements::VOID_TAG)) {
if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->current = $ele;
}
@ -448,7 +452,7 @@ class DOMTreeBuilder implements EventHandler
// This is sort of a last-ditch attempt to correct for cases where no head/body
// elements are provided.
if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
$this->insertMode = static::IM_IN_BODY;
}
@ -456,7 +460,7 @@ class DOMTreeBuilder implements EventHandler
// but we have to remove the namespaces pushed to $nsStack.
if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
// remove the namespaced definded by current node
for ($i = 0; $i < $pushes; $i ++) {
for ($i = 0; $i < $pushes; ++$i) {
array_shift($this->nsStack);
}
}
@ -485,7 +489,7 @@ class DOMTreeBuilder implements EventHandler
'html',
'br',
'head',
'title'
'title',
))) {
$this->startTag('html');
$this->endTag($name);
@ -495,13 +499,13 @@ class DOMTreeBuilder implements EventHandler
}
// Ignore the tag.
$this->parseError("Illegal closing tag at global scope.");
$this->parseError('Illegal closing tag at global scope.');
return;
}
// Special case handling for SVG.
if ($this->insertMode == static::IM_IN_SVG) {
if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@ -512,39 +516,33 @@ class DOMTreeBuilder implements EventHandler
$cid = spl_object_hash($this->current);
}
// XXX: Not sure whether we need this anymore.
// if ($name != $lname) {
// return $this->quirksTreeResolver($lname);
// }
// XXX: HTML has no parent. What do we do, though,
// if this element appears in the wrong place?
if ($lname == 'html') {
if ('html' === $lname) {
return;
}
// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
array_shift($this->nsStack);
}
unset($this->pushes[$cid]);
}
if (! $this->autoclose($lname)) {
if (!$this->autoclose($lname)) {
$this->parseError('Could not find closing tag for ' . $lname);
}
// switch ($this->insertMode) {
switch ($lname) {
case "head":
case 'head':
$this->insertMode = static::IM_AFTER_HEAD;
break;
case "body":
case 'body':
$this->insertMode = static::IM_AFTER_BODY;
break;
case "svg":
case "mathml":
case 'svg':
case 'mathml':
$this->insertMode = static::IM_IN_BODY;
break;
}
@ -566,9 +564,9 @@ class DOMTreeBuilder implements EventHandler
// practical as most documents contain these characters. Other text is not
// expected here so recording a parse error is necessary.
$dataTmp = trim($data, " \t\n\r\f");
if (! empty($dataTmp)) {
if (!empty($dataTmp)) {
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
$this->parseError("Unexpected text. Ignoring: " . $dataTmp);
$this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
}
return;
@ -585,7 +583,7 @@ class DOMTreeBuilder implements EventHandler
public function parseError($msg, $line = 0, $col = 0)
{
$this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
$this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
}
public function getErrors()
@ -602,15 +600,14 @@ class DOMTreeBuilder implements EventHandler
public function processingInstruction($name, $data = null)
{
// XXX: Ignore initial XML declaration, per the spec.
if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
return;
}
// Important: The processor may modify the current DOM tree however
// it sees fit.
if (isset($this->processor)) {
// Important: The processor may modify the current DOM tree however it sees fit.
if ($this->processor instanceof InstructionProcessor) {
$res = $this->processor->process($this->current, $name, $data);
if (! empty($res)) {
if (!empty($res)) {
$this->current = $res;
}
@ -629,24 +626,23 @@ class DOMTreeBuilder implements EventHandler
/**
* Apply normalization rules to a tag name.
*
* See sections 2.9 and 8.1.2.
*
* @param string $name
* The tag name.
* @param string $tagName
*
* @return string The normalized tag name.
*/
protected function normalizeTagName($name)
protected function normalizeTagName($tagName)
{
/*
* Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
*/
return $name;
return $tagName;
}
protected function quirksTreeResolver($name)
{
throw new \Exception("Not implemented.");
throw new \Exception('Not implemented.');
}
/**
@ -660,15 +656,16 @@ class DOMTreeBuilder implements EventHandler
{
$working = $this->current;
do {
if ($working->nodeType != XML_ELEMENT_NODE) {
if (XML_ELEMENT_NODE !== $working->nodeType) {
return false;
}
if ($working->tagName == $tagName) {
if ($working->tagName === $tagName) {
$this->current = $working->parentNode;
return true;
}
} while ($working = $working->parentNode);
return false;
}
@ -685,8 +682,8 @@ class DOMTreeBuilder implements EventHandler
protected function isAncestor($tagName)
{
$candidate = $this->current;
while ($candidate->nodeType === XML_ELEMENT_NODE) {
if ($candidate->tagName == $tagName) {
while (XML_ELEMENT_NODE === $candidate->nodeType) {
if ($candidate->tagName === $tagName) {
return true;
}
$candidate = $candidate->parentNode;
@ -704,6 +701,6 @@ class DOMTreeBuilder implements EventHandler
*/
protected function isParent($tagName)
{
return $this->current->tagName == $tagName;
return $this->current->tagName === $tagName;
}
}

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
/**
@ -20,7 +21,6 @@ namespace Masterminds\HTML5\Parser;
*/
interface EventHandler
{
const DOCTYPE_NONE = 0;
const DOCTYPE_PUBLIC = 1;
@ -30,15 +30,11 @@ interface EventHandler
/**
* A doctype declaration.
*
* @param string $name
* The name of the root element.
* @param int $idType
* One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
* @param string $id
* The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
* then this is a system ID.
* @param boolean $quirks
* Indicates whether the builder should enter quirks mode.
* @param string $name The name of the root element.
* @param int $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
* @param string $id The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
* then this is a system ID.
* @param bool $quirks Indicates whether the builder should enter quirks mode.
*/
public function doctype($name, $idType = 0, $id = null, $quirks = false);
@ -63,13 +59,11 @@ interface EventHandler
* The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
* closing tag is encounter. **This behavior may change.**
*
* @param string $name
* The tag name.
* @param array $attributes
* An array with all of the tag's attributes.
* @param boolean $selfClosing
* An indicator of whether or not this tag is self-closing (<foo/>)
* @return int One of the Tokenizer::TEXTMODE_* constants.
* @param string $name The tag name.
* @param array $attributes An array with all of the tag's attributes.
* @param bool $selfClosing An indicator of whether or not this tag is self-closing (<foo/>).
*
* @return int one of the Tokenizer::TEXTMODE_* constants
*/
public function startTag($name, $attributes = array(), $selfClosing = false);
@ -104,7 +98,7 @@ interface EventHandler
* A CDATA section.
*
* @param string $data
* The unparsed character data.
* The unparsed character data
*/
public function cdata($data);
@ -113,10 +107,8 @@ interface EventHandler
*
* While user agents don't get PIs, server-side does.
*
* @param string $name
* The name of the processor (e.g. 'php').
* @param string $data
* The unparsed data.
* @param string $name The name of the processor (e.g. 'php').
* @param string $data The unparsed data.
*/
public function processingInstruction($name, $data = null);
}

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
/**
@ -18,9 +19,9 @@ class FileInputStream extends StringInputStream implements InputStream
/**
* Load a file input stream.
*
* @param string $data The file or url path to load.
* @param string $data The file or url path to load.
* @param string $encoding The encoding to use for the data.
* @param string $debug A fprintf format to use to echo the data on stdout.
* @param string $debug A fprintf format to use to echo the data on stdout.
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{

View file

@ -49,12 +49,12 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strcspn
* @param string $bytes
* Bytes to match.
* @param int $max
* Maximum number of bytes to scan.
*
* @param string $bytes Bytes to match.
* @param int $max Maximum number of bytes to scan.
*
* @return mixed Index or false if no match is found. You should use strong
* equality when checking the result, since index could be 0.
* equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null);
@ -65,20 +65,18 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strspn
* @param string $bytes
* A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max
* The max number of chars to read.
*
* @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max The max number of chars to read.
*/
public function charsWhile($bytes, $max = null);
/**
* Unconsume one character.
*
* @param int $howMany
* The number of characters to move the pointer back.
* @param int $howMany The number of characters to move the pointer back.
*/
public function unconsume($howMany = 1);

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
/**

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Exception;
@ -18,7 +19,7 @@ class Scanner
private $data;
/**
* The current integer byte position we are in $data
* The current integer byte position we are in $data.
*/
private $char;
@ -35,7 +36,7 @@ class Scanner
/**
* Create a new Scanner.
*
* @param string $data Data to parse
* @param string $data Data to parse.
* @param string $encoding The encoding to use for the data.
*
* @throws Exception If the given data cannot be encoded to UTF-8.
@ -75,14 +76,15 @@ class Scanner
* '</script>' string.
*
* @param string $sequence
* @param bool $caseSensitive
* @param bool $caseSensitive
*
* @return bool
*/
public function sequenceMatches($sequence, $caseSensitive = true)
{
$portion = substr($this->data, $this->char, strlen($sequence));
return $caseSensitive ? $portion === $sequence : strcasecmp($portion, $sequence) === 0;
return $caseSensitive ? $portion === $sequence : 0 === strcasecmp($portion, $sequence);
}
/**
@ -111,14 +113,13 @@ class Scanner
/**
* Get the next character.
*
* Note: This advances the pointer.
*
* @return string The next character.
*/
public function next()
{
$this->char++;
++$this->char;
if ($this->char < $this->EOF) {
return $this->data[$this->char];
@ -129,7 +130,6 @@ class Scanner
/**
* Get the current character.
*
* Note, this does not advance the pointer.
*
* @return string The current character.
@ -157,19 +157,17 @@ class Scanner
* Unconsume some of the data.
* This moves the data pointer backwards.
*
* @param int $howMany
* The number of characters to move the pointer back.
* @param int $howMany The number of characters to move the pointer back.
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
$this->char = $this->char - $howMany;
$this->char -= $howMany;
}
}
/**
* Get the next group of that contains hex characters.
*
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
@ -182,7 +180,6 @@ class Scanner
/**
* Get the next group of characters that are ASCII Alpha characters.
*
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
@ -195,7 +192,6 @@ class Scanner
/**
* Get the next group of characters that are ASCII Alpha characters and numbers.
*
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
@ -208,7 +204,6 @@ class Scanner
/**
* Get the next group of numbers.
*
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
@ -221,12 +216,21 @@ class Scanner
/**
* Consume whitespace.
*
* Whitespace in HTML5 is: formfeed, tab, newline, space.
*
* @return int The length of the matched whitespaces.
*/
public function whitespace()
{
return $this->doCharsWhile("\n\t\f ");
if ($this->char >= $this->EOF) {
return false;
}
$len = strspn($this->data, "\n\t\f ", $this->char);
$this->char += $len;
return $len;
}
/**
@ -236,7 +240,7 @@ class Scanner
*/
public function currentLine()
{
if (empty($this->EOF) || $this->char == 0) {
if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
@ -279,7 +283,7 @@ class Scanner
public function columnOffset()
{
// Short circuit for the first char.
if ($this->char == 0) {
if (0 === $this->char) {
return 0;
}
@ -293,7 +297,7 @@ class Scanner
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
if ($lastLine !== false) {
if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@ -341,7 +345,7 @@ class Scanner
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
"\r" => "\n"
"\r" => "\n",
);
return strtr($data, $crlfTable);
@ -355,12 +359,11 @@ class Scanner
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
* @param string $bytes
* Bytes to match.
* @param int $max
* Maximum number of bytes to scan.
* @param string $bytes Bytes to match.
* @param int $max Maximum number of bytes to scan.
*
* @return mixed Index or false if no match is found. You should use strong
* equality when checking the result, since index could be 0.
* equality when checking the result, since index could be 0.
*/
private function doCharsUntil($bytes, $max = null)
{
@ -368,7 +371,7 @@ class Scanner
return false;
}
if ($max === 0 || $max) {
if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@ -386,12 +389,10 @@ class Scanner
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
* @param string $bytes
* A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max
* The max number of chars to read.
* @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max The max number of chars to read.
*
* @return string
*/
@ -401,7 +402,7 @@ class Scanner
return false;
}
if ($max === 0 || $max) {
if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);

View file

@ -2,6 +2,7 @@
/**
* Loads a string to be parsed.
*/
namespace Masterminds\HTML5\Parser;
/*
@ -50,7 +51,7 @@ class StringInputStream implements InputStream
private $data;
/**
* The current integer byte position we are in $data
* The current integer byte position we are in $data.
*/
private $char;
@ -67,9 +68,9 @@ class StringInputStream implements InputStream
/**
* Create a new InputStream wrapper.
*
* @param string $data Data to parse
* @param string $data Data to parse.
* @param string $encoding The encoding to use for the data.
* @param string $debug A fprintf format to use to echo the data on stdout.
* @param string $debug A fprintf format to use to echo the data on stdout.
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
@ -110,7 +111,7 @@ class StringInputStream implements InputStream
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
"\r" => "\n"
"\r" => "\n",
);
return strtr($data, $crlfTable);
@ -121,7 +122,7 @@ class StringInputStream implements InputStream
*/
public function currentLine()
{
if (empty($this->EOF) || $this->char == 0) {
if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
// Add one to $this->char because we want the number for the next
@ -130,9 +131,7 @@ class StringInputStream implements InputStream
}
/**
*
* @deprecated
*
*/
public function getCurrentLine()
{
@ -141,7 +140,6 @@ class StringInputStream implements InputStream
/**
* Returns the current column of the current line that the tokenizer is at.
*
* Newlines are column 0. The first char after a newline is column 1.
*
* @return int The column number.
@ -149,7 +147,7 @@ class StringInputStream implements InputStream
public function columnOffset()
{
// Short circuit for the first char.
if ($this->char == 0) {
if (0 === $this->char) {
return 0;
}
// strrpos is weird, and the offset needs to be negative for what we
@ -162,7 +160,7 @@ class StringInputStream implements InputStream
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
if ($lastLine !== false) {
if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@ -173,9 +171,7 @@ class StringInputStream implements InputStream
}
/**
*
* @deprecated
*
*/
public function getColumnOffset()
{
@ -198,7 +194,7 @@ class StringInputStream implements InputStream
*/
public function next()
{
$this->char ++;
++$this->char;
}
/**
@ -212,15 +208,11 @@ class StringInputStream implements InputStream
/**
* Is the current pointer location valid.
*
* @return bool Is the current pointer location valid.
* @return bool Whether the current pointer location is valid.
*/
public function valid()
{
if ($this->char < $this->EOF) {
return true;
}
return false;
return $this->char < $this->EOF;
}
/**
@ -229,10 +221,10 @@ class StringInputStream implements InputStream
* This reads to the end of the file, and sets the read marker at the
* end of the file.
*
* @note This performs bounds checking
* Note this performs bounds checking.
*
* @return string Returns the remaining text. If called when the InputStream is
* already exhausted, it returns an empty string.
* already exhausted, it returns an empty string.
*/
public function remainingChars()
{
@ -254,12 +246,11 @@ class StringInputStream implements InputStream
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
* @param string $bytes
* Bytes to match.
* @param int $max
* Maximum number of bytes to scan.
* @param string $bytes Bytes to match.
* @param int $max Maximum number of bytes to scan.
*
* @return mixed Index or false if no match is found. You should use strong
* equality when checking the result, since index could be 0.
* equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null)
{
@ -267,7 +258,7 @@ class StringInputStream implements InputStream
return false;
}
if ($max === 0 || $max) {
if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@ -285,12 +276,10 @@ class StringInputStream implements InputStream
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
* @param string $bytes
* A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max
* The max number of chars to read.
* @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
* current char, the pointer advances and the char is part of the
* substring.
* @param int $max The max number of chars to read.
*
* @return string
*/
@ -300,7 +289,7 @@ class StringInputStream implements InputStream
return false;
}
if ($max === 0 || $max) {
if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
@ -314,13 +303,12 @@ class StringInputStream implements InputStream
/**
* Unconsume characters.
*
* @param int $howMany
* The number of characters to unconsume.
* @param int $howMany The number of characters to unconsume.
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
$this->char = $this->char - $howMany;
$this->char -= $howMany;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Parser;
/**
@ -14,7 +15,6 @@ namespace Masterminds\HTML5\Parser;
*/
class TreeBuildingRules
{
protected static $tags = array(
'li' => 1,
'dd' => 1,
@ -29,20 +29,9 @@ class TreeBuildingRules
'tbody' => 1,
'table' => 1,
'optgroup' => 1,
'option' => 1
'option' => 1,
);
/**
* Build a new rules engine.
*
* @param \DOMDocument $doc
* The DOM document to use for evaluation and modification.
*/
public function __construct($doc)
{
$this->doc = $doc;
}
/**
* Returns true if the given tagname has special processing rules.
*/
@ -71,7 +60,7 @@ class TreeBuildingRules
return $this->handleRT($new, $current);
case 'optgroup':
return $this->closeIfCurrentMatches($new, $current, array(
'optgroup'
'optgroup',
));
case 'option':
return $this->closeIfCurrentMatches($new, $current, array(
@ -79,13 +68,13 @@ class TreeBuildingRules
));
case 'tr':
return $this->closeIfCurrentMatches($new, $current, array(
'tr'
'tr',
));
case 'td':
case 'th':
return $this->closeIfCurrentMatches($new, $current, array(
'th',
'td'
'td',
));
case 'tbody':
case 'thead':
@ -95,7 +84,7 @@ class TreeBuildingRules
return $this->closeIfCurrentMatches($new, $current, array(
'thead',
'tfoot',
'tbody'
'tbody',
));
}
@ -105,7 +94,7 @@ class TreeBuildingRules
protected function handleLI($ele, $current)
{
return $this->closeIfCurrentMatches($ele, $current, array(
'li'
'li',
));
}
@ -113,7 +102,7 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'dt',
'dd'
'dd',
));
}
@ -121,13 +110,13 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'rt',
'rp'
'rp',
));
}
protected function closeIfCurrentMatches($ele, $current, $match)
{
if (in_array($current->tagName, $match)) {
if (in_array($current->tagName, $match, true)) {
$current->parentNode->appendChild($ele);
} else {
$current->appendChild($ele);

View file

@ -1,5 +1,7 @@
<?php
namespace Masterminds\HTML5\Parser;
/*
*
* Portions based on code from html5lib files with the following copyright:
@ -30,11 +32,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
use Masterminds\HTML5\Exception;
/**
* UTF-8 Utilities
* UTF-8 Utilities.
*/
class UTF8Utils
{
/**
* The Unicode replacement character..
*/
@ -76,10 +77,8 @@ class UTF8Utils
* This has not yet been tested with charactersets other than UTF-8.
* It should work with ISO-8859-1/-13 and standard Latin Win charsets.
*
* @param string $data
* The data to convert.
* @param string $encoding
* A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
* @param string $data The data to convert
* @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
*
* @return string
*/
@ -108,7 +107,7 @@ class UTF8Utils
$data = mb_convert_encoding($data, 'UTF-8', $encoding);
mb_substitute_character($save);
} // @todo Get iconv running in at least some environments if that is possible.
elseif (function_exists('iconv') && $encoding != 'auto') {
elseif (function_exists('iconv') && 'auto' !== $encoding) {
// fprintf(STDOUT, "iconv found\n");
// iconv has the following behaviors:
// - Overlong representations are ignored.
@ -122,7 +121,7 @@ class UTF8Utils
/*
* One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
*/
if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
$data = substr($data, 3);
}
@ -132,9 +131,9 @@ class UTF8Utils
/**
* Checks for Unicode code points that are not valid in a document.
*
* @param string $data A string to analyze.
* @param string $data A string to analyze
*
* @return array An array of (string) error messages produced by the scanning.
* @return array An array of (string) error messages produced by the scanning
*/
public static function checkForIllegalCodepoints($data)
{
@ -144,7 +143,7 @@ class UTF8Utils
/*
* All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error.
*/
for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) {
for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
$errors[] = 'null-character';
}
@ -166,7 +165,7 @@ class UTF8Utils
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x', $data, $matches);
for ($i = 0; $i < $count; $i ++) {
for ($i = 0; $i < $count; ++$i) {
$errors[] = 'invalid-codepoint';
}

View file

@ -3,11 +3,12 @@
* @file
* This contains HTML5 entities to use with serializing.
*
* The list here is mildly different from the list at \Masterminds\HTML5\Entities because
* The list here is mildly different from the list at Entities because
* that list was generated from the w3c. It contains some entities that are
* not entirely proper such as &am; which maps to &. This list is meant to be
* a fallback for PHP versions prior to PHP 5.4 when dealing with encoding.
*/
namespace Masterminds\HTML5\Serializer;
/**
@ -17,7 +18,6 @@ namespace Masterminds\HTML5\Serializer;
*/
class HTML5Entities
{
public static $map = array(
' ' => '&Tab;',
"\n" => '&NewLine;',
@ -1528,6 +1528,6 @@ class HTML5Entities
'𝕨' => '&wopf;',
'𝕩' => '&xopf;',
'𝕪' => '&yopf;',
'𝕫' => '&zopf;'
'𝕫' => '&zopf;',
);
}

View file

@ -6,6 +6,7 @@
* These output rules are likely to generate output similar to the document that
* was parsed. It is not intended to output exactly the document that was parsed.
*/
namespace Masterminds\HTML5\Serializer;
use Masterminds\HTML5\Elements;
@ -13,10 +14,10 @@ use Masterminds\HTML5\Elements;
/**
* Generate the output html5 based on element rules.
*/
class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
class OutputRules implements RulesInterface
{
/**
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
* Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@ -31,7 +32,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
/**
* Holds the HTML5 element names that causes a namespace switch
* Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
@ -50,8 +51,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
const IM_IN_MATHML = 3;
/**
* Used as cache to detect if is available ENT_HTML5
* @var boolean
* Used as cache to detect if is available ENT_HTML5.
*
* @var bool
*/
private $hasHTML5 = false;
@ -169,12 +171,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// If HHVM, see https://github.com/facebook/hhvm/issues/2727
$this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
}
public function addRule(array $rule)
{
$this->nonBooleanAttributes[] = $rule;
}
public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
public function setTraverser(Traverser $traverser)
{
$this->traverser = $traverser;
@ -211,10 +214,10 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// If we are in SVG or MathML there is special handling.
// Using if/elseif instead of switch because it's faster in PHP.
if ($name == 'svg') {
if ('svg' == $name) {
$this->outputMode = static::IM_IN_SVG;
$name = Elements::normalizeSvgElement($name);
} elseif ($name == 'math') {
} elseif ('math' == $name) {
$this->outputMode = static::IM_IN_MATHML;
}
@ -234,13 +237,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
}
// Close out the SVG or MathML special handling.
if ($name == 'svg' || $name == 'math') {
if ('svg' == $name || 'math' == $name) {
$this->outputMode = static::IM_IN_HTML;
}
}
// If not unary, add a closing tag.
if (! Elements::isA($name, Elements::VOID_TAG)) {
if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->closeTag($ele);
}
}
@ -248,13 +251,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
/**
* Write a text node.
*
* @param \DOMText $ele
* The text node to write.
* @param \DOMText $ele The text node to write.
*/
public function text($ele)
{
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
$this->wr($ele->data);
return;
}
@ -283,20 +286,19 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
->wr($ele->data)
->wr('?>');
}
/**
* Write the namespace attributes
* Write the namespace attributes.
*
*
* @param \DOMNode $ele
* The element being written.
* @param \DOMNode $ele The element being written.
*/
protected function namespaceAttrs($ele)
{
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
$this->xpath = new \DOMXPath($ele->ownerDocument);
}
foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
}
@ -309,18 +311,15 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
* qualified name (8.3).
*
* @param \DOMNode $ele
* The element being written.
* @param \DOMNode $ele The element being written.
*/
protected function openTag($ele)
{
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
$this->attrs($ele);
$this->namespaceAttrs($ele);
if ($this->outputMode == static::IM_IN_HTML) {
$this->wr('>');
} // If we are not in html mode we are in SVG, MathML, or XML embedded content.
@ -337,7 +336,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
protected function attrs($ele)
{
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
if (! $ele->hasAttributes()) {
if (!$ele->hasAttributes()) {
return $this;
}
@ -345,7 +344,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// value-less attributes.
$map = $ele->attributes;
$len = $map->length;
for ($i = 0; $i < $len; ++ $i) {
for ($i = 0; $i < $len; ++$i) {
$node = $map->item($i);
$val = $this->enc($node->value, true);
@ -365,45 +364,42 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$this->wr(' ')->wr($name);
if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
$this->wr('="')->wr($val)->wr('"');
}
}
}
protected function nonBooleanAttribute(\DOMAttr $attr)
{
$ele = $attr->ownerElement;
foreach($this->nonBooleanAttributes as $rule){
if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
foreach ($this->nonBooleanAttributes as $rule) {
if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
continue;
}
if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
continue;
}
if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
continue;
}
if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
continue;
}
if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
continue;
}
if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
continue;
}
if(isset($rule['xpath'])){
if (isset($rule['xpath'])) {
$xp = $this->getXPath($attr);
if(isset($rule['prefixes'])){
foreach($rule['prefixes'] as $nsPrefix => $ns){
if (isset($rule['prefixes'])) {
foreach ($rule['prefixes'] as $nsPrefix => $ns) {
$xp->registerNamespace($nsPrefix, $ns);
}
}
if(!$xp->evaluate($rule['xpath'], $attr)){
if (!$xp->evaluate($rule['xpath'], $attr)) {
continue;
}
}
@ -414,10 +410,12 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
return false;
}
private function getXPath(\DOMNode $node){
if(!$this->xpath){
private function getXPath(\DOMNode $node)
{
if (!$this->xpath) {
$this->xpath = new \DOMXPath($node->ownerDocument);
}
return $this->xpath;
}
@ -427,8 +425,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
* qualified name (8.3).
*
* @param \DOMNode $ele
* The element being written.
* @param \DOMNode $ele The element being written.
*/
protected function closeTag($ele)
{
@ -440,25 +437,26 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
/**
* Write to the output.
*
* @param string $text
* The string to put into the output.
* @param string $text The string to put into the output
*
* @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
* @return $this
*/
protected function wr($text)
{
fwrite($this->out, $text);
return $this;
}
/**
* Write a new line character.
*
* @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
* @return $this
*/
protected function nl()
{
fwrite($this->out, PHP_EOL);
return $this;
}
@ -484,18 +482,15 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*
* @todo Use the Entities class in php 5.3 to have html5 entities.
*
* @param string $text
* text to encode.
* @param boolean $attribute
* True if we are encoding an attrubute, false otherwise
* @param string $text Text to encode.
* @param bool $attribute True if we are encoding an attrubute, false otherwise.
*
* @return string The encoded text.
*/
protected function enc($text, $attribute = false)
{
// Escape the text rather than convert to named character references.
if (! $this->encode) {
if (!$this->encode) {
return $this->escape($text, $attribute);
}
@ -507,7 +502,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
} // If a version earlier than 5.4 html5 entities are not entirely handled.
// This manually handles them.
else {
return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
return strtr($text, HTML5Entities::$map);
}
}
@ -525,14 +520,11 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
*
* @param string $text
* text to escape.
* @param boolean $attribute
* True if we are escaping an attrubute, false otherwise
* @param string $text Text to escape.
* @param bool $attribute True if we are escaping an attrubute, false otherwise.
*/
protected function escape($text, $attribute = false)
{
// Not using htmlspecialchars because, while it does escaping, it doesn't
// match the requirements of section 8.5. For example, it doesn't handle
// non-breaking spaces.
@ -540,14 +532,14 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$replace = array(
'"' => '&quot;',
'&' => '&amp;',
"\xc2\xa0" => '&nbsp;'
"\xc2\xa0" => '&nbsp;',
);
} else {
$replace = array(
'<' => '&lt;',
'>' => '&gt;',
'&' => '&amp;',
"\xc2\xa0" => '&nbsp;'
"\xc2\xa0" => '&nbsp;',
);
}

View file

@ -3,28 +3,24 @@
* @file
* The interface definition for Rules to generate output.
*/
namespace Masterminds\HTML5\Serializer;
/**
* To create a new rule set for writing output the RulesInterface needs to be
* implemented.
* The resulting class can be specified in the options with the
* key of rules.
* To create a new rule set for writing output the RulesInterface needs to be implemented.
* The resulting class can be specified in the options with the key of rules.
*
* For an example implementation see \Masterminds\HTML5\Serializer\OutputRules.
* For an example implementation see Serializer\OutputRules.
*/
interface RulesInterface
{
/**
* The class constructor.
*
* Note, before the rules can be used a traverser must be registered.
*
* @param mixed $output
* The output stream to write output to.
* @param array $options
* An array of options.
* @param mixed $output The output stream to write output to.
* @param array $options An array of options.
*/
public function __construct($output, $options = array());
@ -33,11 +29,11 @@ interface RulesInterface
*
* Note, only one traverser can be used by the rules.
*
* @param \Masterminds\HTML5\Serializer\Traverser $traverser
* The traverser used in the rules.
* @return \Masterminds\HTML5\Serializer\RulesInterface $this for the current object.
* @param Traverser $traverser The traverser used in the rules.
*
* @return RulesInterface $this for the current object.
*/
public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser);
public function setTraverser(Traverser $traverser);
/**
* Write a document element (\DOMDocument).
@ -92,7 +88,7 @@ interface RulesInterface
/**
* Write a processor instruction.
*
* To learn about processor instructions see \Masterminds\HTML5\InstructionProcessor
* To learn about processor instructions see InstructionProcessor
*
* Instead of returning the result write it to the output stream ($output)
* that was passed into the constructor.

View file

@ -1,4 +1,5 @@
<?php
namespace Masterminds\HTML5\Serializer;
/**
@ -12,14 +13,13 @@ namespace Masterminds\HTML5\Serializer;
*/
class Traverser
{
/**
* Namespaces that should be treated as "local" to HTML5.
*/
static $local_ns = array(
protected static $local_ns = array(
'http://www.w3.org/1999/xhtml' => 'html',
'http://www.w3.org/1998/Math/MathML' => 'math',
'http://www.w3.org/2000/svg' => 'svg'
'http://www.w3.org/2000/svg' => 'svg',
);
protected $dom;
@ -35,16 +35,13 @@ class Traverser
/**
* Create a traverser.
*
* @param DOMNode|DOMNodeList $dom
* The document or node to traverse.
* @param resource $out
* A stream that allows writing. The traverser will output into this
* stream.
* @param array $options
* An array or options for the traverser as key/value pairs. These include:
* - encode_entities: A bool to specify if full encding should happen for all named
* charachter references. Defaults to false which escapes &'<>".
* - output_rules: The path to the class handling the output rules.
* @param \DOMNode|\DOMNodeList $dom The document or node to traverse.
* @param resource $out A stream that allows writing. The traverser will output into this
* stream.
* @param array $options An array of options for the traverser as key/value pairs. These include:
* - encode_entities: A bool to specify if full encding should happen for all named
* charachter references. Defaults to false which escapes &'<>".
* - output_rules: The path to the class handling the output rules.
*/
public function __construct($dom, $out, RulesInterface $rules, $options = array())
{
@ -59,8 +56,7 @@ class Traverser
/**
* Tell the traverser to walk the DOM.
*
* @return resource $out
* Returns the output stream.
* @return resource $out Returns the output stream.
*/
public function walk()
{
@ -87,8 +83,7 @@ class Traverser
/**
* Process a node in the DOM.
*
* @param mixed $node
* A node implementing \DOMNode.
* @param mixed $node A node implementing \DOMNode.
*/
public function node($node)
{
@ -119,8 +114,7 @@ class Traverser
/**
* Walk through all the nodes on a node list.
*
* @param \DOMNodeList $nl
* A list of child elements to walk through.
* @param \DOMNodeList $nl A list of child elements to walk through.
*/
public function children($nl)
{
@ -132,10 +126,9 @@ class Traverser
/**
* Is an element local?
*
* @param mixed $ele
* An element that implement \DOMNode.
* @param mixed $ele An element that implement \DOMNode.
*
* @return bool True if local and false otherwise.
* @return bool true if local and false otherwise.
*/
public function isLocalElement($ele)
{