351 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			351 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
/**
 | 
						|
 * @file
 | 
						|
 * Contains \Drupal\Component\Utility\Html.
 | 
						|
 */
 | 
						|
 | 
						|
namespace Drupal\Component\Utility;
 | 
						|
 | 
						|
/**
 | 
						|
 * Provides DOMDocument helpers for parsing and serializing HTML strings.
 | 
						|
 *
 | 
						|
 * @ingroup utility
 | 
						|
 */
 | 
						|
class Html {
 | 
						|
 | 
						|
  /**
 | 
						|
   * An array of previously cleaned HTML classes.
 | 
						|
   *
 | 
						|
   * @var array
 | 
						|
   */
 | 
						|
  protected static $classes = array();
 | 
						|
 | 
						|
  /**
 | 
						|
   * An array of the initial IDs used in one request.
 | 
						|
   *
 | 
						|
   * @var array
 | 
						|
   */
 | 
						|
  protected static $seenIdsInit;
 | 
						|
 | 
						|
  /**
 | 
						|
   * An array of IDs, including incremented versions when an ID is duplicated.
 | 
						|
   * @var array
 | 
						|
   */
 | 
						|
  protected static $seenIds;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Stores whether the current request was sent via AJAX.
 | 
						|
   *
 | 
						|
   * @var bool
 | 
						|
   */
 | 
						|
  protected static $isAjax = FALSE;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Prepares a string for use as a valid class name.
 | 
						|
   *
 | 
						|
   * Do not pass one string containing multiple classes as they will be
 | 
						|
   * incorrectly concatenated with dashes, i.e. "one two" will become "one-two".
 | 
						|
   *
 | 
						|
   * @param string $class
 | 
						|
   *   The class name to clean.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The cleaned class name.
 | 
						|
   */
 | 
						|
  public static function getClass($class) {
 | 
						|
    if (!isset(static::$classes[$class])) {
 | 
						|
      static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class));
 | 
						|
    }
 | 
						|
    return static::$classes[$class];
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Prepares a string for use as a CSS identifier (element, class, or ID name).
 | 
						|
   *
 | 
						|
   * http://www.w3.org/TR/CSS21/syndata.html#characters shows the syntax for
 | 
						|
   * valid CSS identifiers (including element names, classes, and IDs in
 | 
						|
   * selectors.)
 | 
						|
   *
 | 
						|
   * @param string $identifier
 | 
						|
   *   The identifier to clean.
 | 
						|
   * @param array $filter
 | 
						|
   *   An array of string replacements to use on the identifier.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The cleaned identifier.
 | 
						|
   */
 | 
						|
  public static function cleanCssIdentifier($identifier, array $filter = array(
 | 
						|
    ' ' => '-',
 | 
						|
    '_' => '-',
 | 
						|
    '__' => '__',
 | 
						|
    '/' => '-',
 | 
						|
    '[' => '-',
 | 
						|
    ']' => ''
 | 
						|
  )) {
 | 
						|
    $identifier = strtr($identifier, $filter);
 | 
						|
    // Valid characters in a CSS identifier are:
 | 
						|
    // - the hyphen (U+002D)
 | 
						|
    // - a-z (U+0030 - U+0039)
 | 
						|
    // - A-Z (U+0041 - U+005A)
 | 
						|
    // - the underscore (U+005F)
 | 
						|
    // - 0-9 (U+0061 - U+007A)
 | 
						|
    // - ISO 10646 characters U+00A1 and higher
 | 
						|
    // We strip out any character not in the above list.
 | 
						|
    $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier);
 | 
						|
    // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
 | 
						|
    $identifier = preg_replace(array(
 | 
						|
      '/^[0-9]/',
 | 
						|
      '/^(-[0-9])|^(--)/'
 | 
						|
    ), array('_', '__'), $identifier);
 | 
						|
    return $identifier;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Sets if this request is an Ajax request.
 | 
						|
   *
 | 
						|
   * @param bool $is_ajax
 | 
						|
   *   TRUE if this request is an Ajax request, FALSE otherwise.
 | 
						|
   */
 | 
						|
  public static function setIsAjax($is_ajax) {
 | 
						|
    static::$isAjax = $is_ajax;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Prepares a string for use as a valid HTML ID and guarantees uniqueness.
 | 
						|
   *
 | 
						|
   * This function ensures that each passed HTML ID value only exists once on
 | 
						|
   * the page. By tracking the already returned ids, this function enables
 | 
						|
   * forms, blocks, and other content to be output multiple times on the same
 | 
						|
   * page, without breaking (X)HTML validation.
 | 
						|
   *
 | 
						|
   * For already existing IDs, a counter is appended to the ID string.
 | 
						|
   * Therefore, JavaScript and CSS code should not rely on any value that was
 | 
						|
   * generated by this function and instead should rely on manually added CSS
 | 
						|
   * classes or similarly reliable constructs.
 | 
						|
   *
 | 
						|
   * Two consecutive hyphens separate the counter from the original ID. To
 | 
						|
   * manage uniqueness across multiple Ajax requests on the same page, Ajax
 | 
						|
   * requests POST an array of all IDs currently present on the page, which are
 | 
						|
   * used to prime this function's cache upon first invocation.
 | 
						|
   *
 | 
						|
   * To allow reverse-parsing of IDs submitted via Ajax, any multiple
 | 
						|
   * consecutive hyphens in the originally passed $id are replaced with a
 | 
						|
   * single hyphen.
 | 
						|
   *
 | 
						|
   * @param string $id
 | 
						|
   *   The ID to clean.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The cleaned ID.
 | 
						|
   */
 | 
						|
  public static function getUniqueId($id) {
 | 
						|
    // If this is an Ajax request, then content returned by this page request
 | 
						|
    // will be merged with content already on the base page. The HTML IDs must
 | 
						|
    // be unique for the fully merged content. Therefore use unique IDs.
 | 
						|
    if (static::$isAjax) {
 | 
						|
      return static::getId($id) . '--' . Crypt::randomBytesBase64(8);
 | 
						|
    }
 | 
						|
 | 
						|
    // @todo Remove all that code once we switch over to random IDs only,
 | 
						|
    // see https://www.drupal.org/node/1090592.
 | 
						|
    if (!isset(static::$seenIdsInit)) {
 | 
						|
      static::$seenIdsInit = array();
 | 
						|
    }
 | 
						|
    if (!isset(static::$seenIds)) {
 | 
						|
      static::$seenIds = static::$seenIdsInit;
 | 
						|
    }
 | 
						|
 | 
						|
    $id = static::getId($id);
 | 
						|
 | 
						|
    // Ensure IDs are unique by appending a counter after the first occurrence.
 | 
						|
    // The counter needs to be appended with a delimiter that does not exist in
 | 
						|
    // the base ID. Requiring a unique delimiter helps ensure that we really do
 | 
						|
    // return unique IDs and also helps us re-create the $seen_ids array during
 | 
						|
    // Ajax requests.
 | 
						|
    if (isset(static::$seenIds[$id])) {
 | 
						|
      $id = $id . '--' . ++static::$seenIds[$id];
 | 
						|
    }
 | 
						|
    else {
 | 
						|
      static::$seenIds[$id] = 1;
 | 
						|
    }
 | 
						|
    return $id;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Prepares a string for use as a valid HTML ID.
 | 
						|
   *
 | 
						|
   * Only use this function when you want to intentionally skip the uniqueness
 | 
						|
   * guarantee of self::getUniqueId().
 | 
						|
   *
 | 
						|
   * @param string $id
 | 
						|
   *   The ID to clean.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The cleaned ID.
 | 
						|
   *
 | 
						|
   * @see self::getUniqueId()
 | 
						|
   */
 | 
						|
  public static function getId($id) {
 | 
						|
    $id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], Unicode::strtolower($id));
 | 
						|
 | 
						|
    // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
 | 
						|
    // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
 | 
						|
    // colons (":"), and periods ("."). We strip out any character not in that
 | 
						|
    // list. Note that the CSS spec doesn't allow colons or periods in identifiers
 | 
						|
    // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two
 | 
						|
    // characters as well.
 | 
						|
    $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id);
 | 
						|
 | 
						|
    // Removing multiple consecutive hyphens.
 | 
						|
    $id = preg_replace('/\-+/', '-', $id);
 | 
						|
    return $id;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Resets the list of seen IDs.
 | 
						|
   */
 | 
						|
  public static function resetSeenIds() {
 | 
						|
    static::$seenIds = NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Normalizes an HTML snippet.
 | 
						|
   *
 | 
						|
   * This function is essentially \DOMDocument::normalizeDocument(), but
 | 
						|
   * operates on an HTML string instead of a \DOMDocument.
 | 
						|
   *
 | 
						|
   * @param string $html
 | 
						|
   *   The HTML string to normalize.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The normalized HTML string.
 | 
						|
   */
 | 
						|
  public static function normalize($html) {
 | 
						|
    $document = static::load($html);
 | 
						|
    return static::serialize($document);
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Parses an HTML snippet and returns it as a DOM object.
 | 
						|
   *
 | 
						|
   * This function loads the body part of a partial (X)HTML document and returns
 | 
						|
   * a full \DOMDocument object that represents this document.
 | 
						|
   *
 | 
						|
   * Use \Drupal\Component\Utility\Html::serialize() to serialize this
 | 
						|
   * \DOMDocument back to a string.
 | 
						|
   *
 | 
						|
   * @param string $html
 | 
						|
   *   The partial (X)HTML snippet to load. Invalid markup will be corrected on
 | 
						|
   *   import.
 | 
						|
   *
 | 
						|
   * @return \DOMDocument
 | 
						|
   *   A \DOMDocument that represents the loaded (X)HTML snippet.
 | 
						|
   */
 | 
						|
  public static function load($html) {
 | 
						|
    $document = <<<EOD
 | 
						|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 | 
						|
<html xmlns="http://www.w3.org/1999/xhtml">
 | 
						|
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
 | 
						|
<body>!html</body>
 | 
						|
</html>
 | 
						|
EOD;
 | 
						|
    // PHP's \DOMDocument serialization adds straw whitespace in case the markup
 | 
						|
    // of the wrapping document contains newlines, so ensure to remove all
 | 
						|
    // newlines before injecting the actual HTML body to process.
 | 
						|
    $document = strtr($document, array("\n" => '', '!html' => $html));
 | 
						|
 | 
						|
    $dom = new \DOMDocument();
 | 
						|
    // Ignore warnings during HTML soup loading.
 | 
						|
    @$dom->loadHTML($document);
 | 
						|
 | 
						|
    return $dom;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Converts the body of a \DOMDocument back to an HTML snippet.
 | 
						|
   *
 | 
						|
   * The function serializes the body part of a \DOMDocument back to an (X)HTML
 | 
						|
   * snippet. The resulting (X)HTML snippet will be properly formatted to be
 | 
						|
   * compatible with HTML user agents.
 | 
						|
   *
 | 
						|
   * @param \DOMDocument $document
 | 
						|
   *   A \DOMDocument object to serialize, only the tags below the first <body>
 | 
						|
   *   node will be converted.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   A valid (X)HTML snippet, as a string.
 | 
						|
   */
 | 
						|
  public static function serialize(\DOMDocument $document) {
 | 
						|
    $body_node = $document->getElementsByTagName('body')->item(0);
 | 
						|
    $html = '';
 | 
						|
 | 
						|
    foreach ($body_node->getElementsByTagName('script') as $node) {
 | 
						|
      static::escapeCdataElement($node);
 | 
						|
    }
 | 
						|
    foreach ($body_node->getElementsByTagName('style') as $node) {
 | 
						|
      static::escapeCdataElement($node, '/*', '*/');
 | 
						|
    }
 | 
						|
    foreach ($body_node->childNodes as $node) {
 | 
						|
      $html .= $document->saveXML($node);
 | 
						|
    }
 | 
						|
    return $html;
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Adds comments around a <!CDATA section in a \DOMNode.
 | 
						|
   *
 | 
						|
   * \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes
 | 
						|
   * CDATA sections from the contents of inline script and style tags. This can
 | 
						|
   * cause HTML4 browsers to throw exceptions.
 | 
						|
   *
 | 
						|
   * This function attempts to solve the problem by creating a
 | 
						|
   * \DOMDocumentFragment to comment the CDATA tag.
 | 
						|
   *
 | 
						|
   * @param \DOMNode $node
 | 
						|
   *   The element potentially containing a CDATA node.
 | 
						|
   * @param string $comment_start
 | 
						|
   *   (optional) A string to use as a comment start marker to escape the CDATA
 | 
						|
   *   declaration. Defaults to '//'.
 | 
						|
   * @param string $comment_end
 | 
						|
   *   (optional) A string to use as a comment end marker to escape the CDATA
 | 
						|
   *   declaration. Defaults to an empty string.
 | 
						|
   */
 | 
						|
  public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') {
 | 
						|
    foreach ($node->childNodes as $child_node) {
 | 
						|
      if ($child_node instanceof \DOMCdataSection) {
 | 
						|
        $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
 | 
						|
        $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
 | 
						|
 | 
						|
        // Prevent invalid cdata escaping as this would throw a DOM error.
 | 
						|
        // This is the same behavior as found in libxml2.
 | 
						|
        // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection
 | 
						|
        // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting
 | 
						|
        $data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data);
 | 
						|
 | 
						|
        $fragment = $node->ownerDocument->createDocumentFragment();
 | 
						|
        $fragment->appendXML($embed_prefix . $data . $embed_suffix);
 | 
						|
        $node->appendChild($fragment);
 | 
						|
        $node->removeChild($child_node);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
 | 
						|
   *
 | 
						|
   * Double-escaped entities will only be decoded once ("&lt;" becomes
 | 
						|
   * "<", not "<"). Be careful when using this function, as it will revert
 | 
						|
   * previous sanitization efforts (<script> will become <script>).
 | 
						|
   *
 | 
						|
   * @param string $text
 | 
						|
   *   The text to decode entities in.
 | 
						|
   *
 | 
						|
   * @return string
 | 
						|
   *   The input $text, with all HTML entities decoded once.
 | 
						|
   */
 | 
						|
  public static function decodeEntities($text) {
 | 
						|
    return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
 | 
						|
  }
 | 
						|
 | 
						|
}
 |