'-', '_' => '-', '__' => '__', '/' => '-', '[' => '-', ']' => '' )) { $identifier = strtr($identifier, $filter); // Valid characters in a CSS identifier are: // - the hyphen (U+002D) // - a-z (U+0030 - U+0039) // - A-Z (U+0041 - U+005A) // - the underscore (U+005F) // - 0-9 (U+0061 - U+007A) // - ISO 10646 characters U+00A1 and higher // We strip out any character not in the above list. $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier); // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. $identifier = preg_replace(array( '/^[0-9]/', '/^(-[0-9])|^(--)/' ), array('_', '__'), $identifier); return $identifier; } /** * Sets if this request is an Ajax request. * * @param bool $is_ajax * TRUE if this request is an Ajax request, FALSE otherwise. */ public static function setIsAjax($is_ajax) { static::$isAjax = $is_ajax; } /** * Prepares a string for use as a valid HTML ID and guarantees uniqueness. * * This function ensures that each passed HTML ID value only exists once on * the page. By tracking the already returned ids, this function enables * forms, blocks, and other content to be output multiple times on the same * page, without breaking (X)HTML validation. * * For already existing IDs, a counter is appended to the ID string. * Therefore, JavaScript and CSS code should not rely on any value that was * generated by this function and instead should rely on manually added CSS * classes or similarly reliable constructs. * * Two consecutive hyphens separate the counter from the original ID. To * manage uniqueness across multiple Ajax requests on the same page, Ajax * requests POST an array of all IDs currently present on the page, which are * used to prime this function's cache upon first invocation. * * To allow reverse-parsing of IDs submitted via Ajax, any multiple * consecutive hyphens in the originally passed $id are replaced with a * single hyphen. * * @param string $id * The ID to clean. * * @return string * The cleaned ID. */ public static function getUniqueId($id) { // If this is an Ajax request, then content returned by this page request // will be merged with content already on the base page. The HTML IDs must // be unique for the fully merged content. Therefore use unique IDs. if (static::$isAjax) { return static::getId($id) . '--' . Crypt::randomBytesBase64(8); } // @todo Remove all that code once we switch over to random IDs only, // see https://www.drupal.org/node/1090592. if (!isset(static::$seenIdsInit)) { static::$seenIdsInit = array(); } if (!isset(static::$seenIds)) { static::$seenIds = static::$seenIdsInit; } $id = static::getId($id); // Ensure IDs are unique by appending a counter after the first occurrence. // The counter needs to be appended with a delimiter that does not exist in // the base ID. Requiring a unique delimiter helps ensure that we really do // return unique IDs and also helps us re-create the $seen_ids array during // Ajax requests. if (isset(static::$seenIds[$id])) { $id = $id . '--' . ++static::$seenIds[$id]; } else { static::$seenIds[$id] = 1; } return $id; } /** * Prepares a string for use as a valid HTML ID. * * Only use this function when you want to intentionally skip the uniqueness * guarantee of self::getUniqueId(). * * @param string $id * The ID to clean. * * @return string * The cleaned ID. * * @see self::getUniqueId() */ public static function getId($id) { $id = strtr(Unicode::strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => '')); // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"), // colons (":"), and periods ("."). We strip out any character not in that // list. Note that the CSS spec doesn't allow colons or periods in identifiers // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two // characters as well. $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id); // Removing multiple consecutive hyphens. $id = preg_replace('/\-+/', '-', $id); return $id; } /** * Resets the list of seen IDs. */ public static function resetSeenIds() { static::$seenIds = NULL; } /** * Normalizes an HTML snippet. * * This function is essentially \DOMDocument::normalizeDocument(), but * operates on an HTML string instead of a \DOMDocument. * * @param string $html * The HTML string to normalize. * * @return string * The normalized HTML string. */ public static function normalize($html) { $document = static::load($html); return static::serialize($document); } /** * Parses an HTML snippet and returns it as a DOM object. * * This function loads the body part of a partial (X)HTML document and returns * a full \DOMDocument object that represents this document. * * Use \Drupal\Component\Utility\Html::serialize() to serialize this * \DOMDocument back to a string. * * @param string $html * The partial (X)HTML snippet to load. Invalid markup will be corrected on * import. * * @return \DOMDocument * A \DOMDocument that represents the loaded (X)HTML snippet. */ public static function load($html) { $document = << !html EOD; // PHP's \DOMDocument serialization adds straw whitespace in case the markup // of the wrapping document contains newlines, so ensure to remove all // newlines before injecting the actual HTML body to process. $document = strtr($document, array("\n" => '', '!html' => $html)); $dom = new \DOMDocument(); // Ignore warnings during HTML soup loading. @$dom->loadHTML($document); return $dom; } /** * Converts the body of a \DOMDocument back to an HTML snippet. * * The function serializes the body part of a \DOMDocument back to an (X)HTML * snippet. The resulting (X)HTML snippet will be properly formatted to be * compatible with HTML user agents. * * @param \DOMDocument $document * A \DOMDocument object to serialize, only the tags below the first * node will be converted. * * @return string * A valid (X)HTML snippet, as a string. */ public static function serialize(\DOMDocument $document) { $body_node = $document->getElementsByTagName('body')->item(0); $html = ''; foreach ($body_node->getElementsByTagName('script') as $node) { static::escapeCdataElement($node); } foreach ($body_node->getElementsByTagName('style') as $node) { static::escapeCdataElement($node, '/*', '*/'); } foreach ($body_node->childNodes as $node) { $html .= $document->saveXML($node); } return $html; } /** * Adds comments around a childNodes as $child_node) { if ($child_node instanceof \DOMCdataSection) { $embed_prefix = "\n{$comment_end}\n"; // Prevent invalid cdata escaping as this would throw a DOM error. // This is the same behavior as found in libxml2. // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting $data = str_replace(']]>', ']]]]>', $child_node->data); $fragment = $node->ownerDocument->createDocumentFragment(); $fragment->appendXML($embed_prefix . $data . $embed_suffix); $node->appendChild($fragment); $node->removeChild($child_node); } } } /** * Decodes all HTML entities including numerical ones to regular UTF-8 bytes. * * Double-escaped entities will only be decoded once ("&lt;" becomes * "<", not "<"). Be careful when using this function, as it will revert * previous sanitization efforts (<script> will become