﻿PK      ��Z)��%�@  �@    Serializer/OutputRules.phpnu W+A��        <?php
/**
 * @file
 * The rules for generating output in the serializer.
 *
 * These output rules are likely to generate output similar to the document that
 * was parsed. It is not intended to output exactly the document that was parsed.
 */

namespace Masterminds\HTML5\Serializer;

use Masterminds\HTML5\Elements;

/**
 * Generate the output html5 based on element rules.
 */
class OutputRules implements RulesInterface
{
    /**
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
     */
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';

    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';

    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';

    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';

    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';

    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';

    /**
     * Holds the HTML5 element names that causes a namespace switch.
     *
     * @var array
     */
    protected $implicitNamespaces = array(
        self::NAMESPACE_HTML,
        self::NAMESPACE_SVG,
        self::NAMESPACE_MATHML,
        self::NAMESPACE_XML,
        self::NAMESPACE_XMLNS,
    );

    const IM_IN_HTML = 1;

    const IM_IN_SVG = 2;

    const IM_IN_MATHML = 3;

    /**
     * Used as cache to detect if is available ENT_HTML5.
     *
     * @var bool
     */
    private $hasHTML5 = false;

    protected $traverser;

    protected $encode = false;

    protected $out;

    protected $outputMode;

    private $xpath;

    protected $nonBooleanAttributes = array(
        /*
        array(
            'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
            'attrNamespace'=>'http://www.w3.org/1999/xhtml',

            'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
            'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
        ),
        */
        array(
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
            'attrName' => array('href',
                'hreflang',
                'http-equiv',
                'icon',
                'id',
                'keytype',
                'kind',
                'label',
                'lang',
                'language',
                'list',
                'maxlength',
                'media',
                'method',
                'name',
                'placeholder',
                'rel',
                'rows',
                'rowspan',
                'sandbox',
                'spellcheck',
                'scope',
                'seamless',
                'shape',
                'size',
                'sizes',
                'span',
                'src',
                'srcdoc',
                'srclang',
                'srcset',
                'start',
                'step',
                'style',
                'summary',
                'tabindex',
                'target',
                'title',
                'type',
                'value',
                'width',
                'border',
                'charset',
                'cite',
                'class',
                'code',
                'codebase',
                'color',
                'cols',
                'colspan',
                'content',
                'coords',
                'data',
                'datetime',
                'default',
                'dir',
                'dirname',
                'enctype',
                'for',
                'form',
                'formaction',
                'headers',
                'height',
                'accept',
                'accept-charset',
                'accesskey',
                'action',
                'align',
                'alt',
                'bgcolor',
            ),
        ),
        array(
            'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
            'xpath' => 'starts-with(local-name(), \'data-\')',
        ),
    );

    const DOCTYPE = '<!DOCTYPE html>';

    public function __construct($output, $options = array())
    {
        if (isset($options['encode_entities'])) {
            $this->encode = $options['encode_entities'];
        }

        $this->outputMode = static::IM_IN_HTML;
        $this->out = $output;
        $this->hasHTML5 = defined('ENT_HTML5');
    }

    public function addRule(array $rule)
    {
        $this->nonBooleanAttributes[] = $rule;
    }

    public function setTraverser(Traverser $traverser)
    {
        $this->traverser = $traverser;

        return $this;
    }

    public function unsetTraverser()
    {
        $this->traverser = null;

        return $this;
    }

    public function document($dom)
    {
        $this->doctype();
        if ($dom->documentElement) {
            foreach ($dom->childNodes as $node) {
                $this->traverser->node($node);
            }
            $this->nl();
        }
    }

    protected function doctype()
    {
        $this->wr(static::DOCTYPE);
        $this->nl();
    }

    public function element($ele)
    {
        $name = $ele->tagName;

        // Per spec:
        // If the element has a declared namespace in the HTML, MathML or
        // SVG namespaces, we use the lname instead of the tagName.
        if ($this->traverser->isLocalElement($ele)) {
            $name = $ele->localName;
        }

        // If we are in SVG or MathML there is special handling.
        // Using if/elseif instead of switch because it's faster in PHP.
        if ('svg' == $name) {
            $this->outputMode = static::IM_IN_SVG;
            $name = Elements::normalizeSvgElement($name);
        } elseif ('math' == $name) {
            $this->outputMode = static::IM_IN_MATHML;
        }

        $this->openTag($ele);
        if (Elements::isA($name, Elements::TEXT_RAW)) {
            foreach ($ele->childNodes as $child) {
                if ($child instanceof \DOMCharacterData) {
                    $this->wr($child->data);
                } elseif ($child instanceof \DOMElement) {
                    $this->element($child);
                }
            }
        } else {
            // Handle children.
            if ($ele->hasChildNodes()) {
                $this->traverser->children($ele->childNodes);
            }

            // Close out the SVG or MathML special handling.
            if ('svg' == $name || 'math' == $name) {
                $this->outputMode = static::IM_IN_HTML;
            }
        }

        // If not unary, add a closing tag.
        if (!Elements::isA($name, Elements::VOID_TAG)) {
            $this->closeTag($ele);
        }
    }

    /**
     * Write a text node.
     *
     * @param \DOMText $ele The text node to write.
     */
    public function text($ele)
    {
        if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
            $this->wr($ele->data);

            return;
        }

        // FIXME: This probably needs some flags set.
        $this->wr($this->enc($ele->data));
    }

    public function cdata($ele)
    {
        // This encodes CDATA.
        $this->wr($ele->ownerDocument->saveXML($ele));
    }

    public function comment($ele)
    {
        // These produce identical output.
        // $this->wr('<!--')->wr($ele->data)->wr('-->');
        $this->wr($ele->ownerDocument->saveXML($ele));
    }

    public function processorInstruction($ele)
    {
        $this->wr('<?')
            ->wr($ele->target)
            ->wr(' ')
            ->wr($ele->data)
            ->wr('?>');
    }

    /**
     * Write the namespace attributes.
     *
     * @param \DOMNode $ele The element being written.
     */
    protected function namespaceAttrs($ele)
    {
        if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
            $this->xpath = new \DOMXPath($ele->ownerDocument);
        }

        foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
            if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
                $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
            }
        }
    }

    /**
     * Write the opening tag.
     *
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
     * qualified name (8.3).
     *
     * @param \DOMNode $ele The element being written.
     */
    protected function openTag($ele)
    {
        $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);

        $this->attrs($ele);
        $this->namespaceAttrs($ele);

        if ($this->outputMode == static::IM_IN_HTML) {
            $this->wr('>');
        }         // If we are not in html mode we are in SVG, MathML, or XML embedded content.
        else {
            if ($ele->hasChildNodes()) {
                $this->wr('>');
            }             // If there are no children this is self closing.
            else {
                $this->wr(' />');
            }
        }
    }

    protected function attrs($ele)
    {
        // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
        if (!$ele->hasAttributes()) {
            return $this;
        }

        // TODO: Currently, this always writes name="value", and does not do
        // value-less attributes.
        $map = $ele->attributes;
        $len = $map->length;
        for ($i = 0; $i < $len; ++$i) {
            $node = $map->item($i);
            $val = $this->enc($node->value, true);

            // XXX: The spec says that we need to ensure that anything in
            // the XML, XMLNS, or XLink NS's should use the canonical
            // prefix. It seems that DOM does this for us already, but there
            // may be exceptions.
            $name = $node->nodeName;

            // Special handling for attributes in SVG and MathML.
            // Using if/elseif instead of switch because it's faster in PHP.
            if ($this->outputMode == static::IM_IN_SVG) {
                $name = Elements::normalizeSvgAttribute($name);
            } elseif ($this->outputMode == static::IM_IN_MATHML) {
                $name = Elements::normalizeMathMlAttribute($name);
            }

            $this->wr(' ')->wr($name);

            if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
                $this->wr('="')->wr($val)->wr('"');
            }
        }
    }

    protected function nonBooleanAttribute(\DOMAttr $attr)
    {
        $ele = $attr->ownerElement;
        foreach ($this->nonBooleanAttributes as $rule) {
            if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
                continue;
            }
            if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
                continue;
            }
            if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
                continue;
            }
            if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
                continue;
            }
            if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
                continue;
            }
            if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
                continue;
            }
            if (isset($rule['xpath'])) {
                $xp = $this->getXPath($attr);
                if (isset($rule['prefixes'])) {
                    foreach ($rule['prefixes'] as $nsPrefix => $ns) {
                        $xp->registerNamespace($nsPrefix, $ns);
                    }
                }
                if (!$xp->evaluate($rule['xpath'], $attr)) {
                    continue;
                }
            }

            return true;
        }

        return false;
    }

    private function getXPath(\DOMNode $node)
    {
        if (!$this->xpath) {
            $this->xpath = new \DOMXPath($node->ownerDocument);
        }

        return $this->xpath;
    }

    /**
     * Write the closing tag.
     *
     * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
     * qualified name (8.3).
     *
     * @param \DOMNode $ele The element being written.
     */
    protected function closeTag($ele)
    {
        if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
            $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
        }
    }

    /**
     * Write to the output.
     *
     * @param string $text The string to put into the output
     *
     * @return $this
     */
    protected function wr($text)
    {
        fwrite($this->out, $text);

        return $this;
    }

    /**
     * Write a new line character.
     *
     * @return $this
     */
    protected function nl()
    {
        fwrite($this->out, PHP_EOL);

        return $this;
    }

    /**
     * Encode text.
     *
     * When encode is set to false, the default value, the text passed in is
     * escaped per section 8.3 of the html5 spec. For details on how text is
     * escaped see the escape() method.
     *
     * When encoding is set to true the text is converted to named character
     * references where appropriate. Section 8.1.4 Character references of the
     * html5 spec refers to using named character references. This is useful for
     * characters that can't otherwise legally be used in the text.
     *
     * The named character references are listed in section 8.5.
     *
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
     *      This includes such characters as +.# and many other common ones. By default
     *      encoding here will just escape &'<>".
     *
     *      Note, PHP 5.4+ has better html5 encoding.
     *
     * @todo Use the Entities class in php 5.3 to have html5 entities.
     *
     * @param string $text      Text to encode.
     * @param bool   $attribute True if we are encoding an attrubute, false otherwise.
     *
     * @return string The encoded text.
     */
    protected function enc($text, $attribute = false)
    {
        // Escape the text rather than convert to named character references.
        if (!$this->encode) {
            return $this->escape($text, $attribute);
        }

        // If we are in PHP 5.4+ we can use the native html5 entity functionality to
        // convert the named character references.

        if ($this->hasHTML5) {
            return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
        }         // If a version earlier than 5.4 html5 entities are not entirely handled.
        // This manually handles them.
        else {
            return strtr($text, HTML5Entities::$map);
        }
    }

    /**
     * Escape test.
     *
     * According to the html5 spec section 8.3 Serializing HTML fragments, text
     * within tags that are not style, script, xmp, iframe, noembed, and noframes
     * need to be properly escaped.
     *
     * The & should be converted to &amp;, no breaking space unicode characters
     * converted to &nbsp;, when in attribute mode the " should be converted to
     * &quot;, and when not in attribute mode the < and > should be converted to
     * &lt; and &gt;.
     *
     * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
     *
     * @param string $text      Text to escape.
     * @param bool   $attribute True if we are escaping an attrubute, false otherwise.
     */
    protected function escape($text, $attribute = false)
    {
        // Not using htmlspecialchars because, while it does escaping, it doesn't
        // match the requirements of section 8.5. For example, it doesn't handle
        // non-breaking spaces.
        if ($attribute) {
            $replace = array(
                '"' => '&quot;',
                '&' => '&amp;',
                "\xc2\xa0" => '&nbsp;',
            );
        } else {
            $replace = array(
                '<' => '&lt;',
                '>' => '&gt;',
                '&' => '&amp;',
                "\xc2\xa0" => '&nbsp;',
            );
        }

        return strtr($text, $replace);
    }
}
PK      ��Z�l�E�  �    Serializer/Traverser.phpnu W+A��        <?php

namespace Masterminds\HTML5\Serializer;

/**
 * Traverser for walking a DOM tree.
 *
 * This is a concrete traverser designed to convert a DOM tree into an
 * HTML5 document. It is not intended to be a generic DOMTreeWalker
 * implementation.
 *
 * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments
 */
class Traverser
{
    /**
     * Namespaces that should be treated as "local" to HTML5.
     */
    protected static $local_ns = array(
        'http://www.w3.org/1999/xhtml' => 'html',
        'http://www.w3.org/1998/Math/MathML' => 'math',
        'http://www.w3.org/2000/svg' => 'svg',
    );

    protected $dom;

    protected $options;

    protected $encode = false;

    protected $rules;

    protected $out;

    /**
     * Create a traverser.
     *
     * @param \DOMNode|\DOMNodeList $dom     The document or node to traverse.
     * @param resource              $out     A stream that allows writing. The traverser will output into this
     *                                       stream.
     * @param array                 $options An array of options for the traverser as key/value pairs. These include:
     *                                       - encode_entities: A bool to specify if full encding should happen for all named
     *                                       charachter references. Defaults to false which escapes &'<>".
     *                                       - output_rules: The path to the class handling the output rules.
     */
    public function __construct($dom, $out, RulesInterface $rules, $options = array())
    {
        $this->dom = $dom;
        $this->out = $out;
        $this->rules = $rules;
        $this->options = $options;

        $this->rules->setTraverser($this);
    }

    /**
     * Tell the traverser to walk the DOM.
     *
     * @return resource $out Returns the output stream.
     */
    public function walk()
    {
        if ($this->dom instanceof \DOMDocument) {
            $this->rules->document($this->dom);
        } elseif ($this->dom instanceof \DOMDocumentFragment) {
            // Document fragments are a special case. Only the children need to
            // be serialized.
            if ($this->dom->hasChildNodes()) {
                $this->children($this->dom->childNodes);
            }
        }        // If NodeList, loop
        elseif ($this->dom instanceof \DOMNodeList) {
            // If this is a NodeList of DOMDocuments this will not work.
            $this->children($this->dom);
        }         // Else assume this is a DOMNode-like datastructure.
        else {
            $this->node($this->dom);
        }

        return $this->out;
    }

    /**
     * Process a node in the DOM.
     *
     * @param mixed $node A node implementing \DOMNode.
     */
    public function node($node)
    {
        // A listing of types is at http://php.net/manual/en/dom.constants.php
        switch ($node->nodeType) {
            case XML_ELEMENT_NODE:
                $this->rules->element($node);
                break;
            case XML_TEXT_NODE:
                $this->rules->text($node);
                break;
            case XML_CDATA_SECTION_NODE:
                $this->rules->cdata($node);
                break;
            case XML_PI_NODE:
                $this->rules->processorInstruction($node);
                break;
            case XML_COMMENT_NODE:
                $this->rules->comment($node);
                break;
            // Currently we don't support embedding DTDs.
            default:
                //print '<!-- Skipped -->';
                break;
        }
    }

    /**
     * Walk through all the nodes on a node list.
     *
     * @param \DOMNodeList $nl A list of child elements to walk through.
     */
    public function children($nl)
    {
        foreach ($nl as $node) {
            $this->node($node);
        }
    }

    /**
     * Is an element local?
     *
     * @param mixed $ele An element that implement \DOMNode.
     *
     * @return bool true if local and false otherwise.
     */
    public function isLocalElement($ele)
    {
        $uri = $ele->namespaceURI;
        if (empty($uri)) {
            return false;
        }

        return isset(static::$local_ns[$uri]);
    }
}
PK      ��Z	�n��  �    Serializer/HTML5Entities.phpnu W+A��        <?php
/**
 * @file
 * This contains HTML5 entities to use with serializing.
 *
 * The list here is mildly different from the list at Entities because
 * that list was generated from the w3c. It contains some entities that are
 * not entirely proper such as &am; which maps to &. This list is meant to be
 * a fallback for PHP versions prior to PHP 5.4 when dealing with encoding.
 */

namespace Masterminds\HTML5\Serializer;

/**
 * A mapping of entities to their html5 representation.
 * Used for older PHP
 * versions that don't have the mapping.
 */
class HTML5Entities
{
    public static $map = array(
        '	' => '&Tab;',
        "\n" => '&NewLine;',
        '!' => '&excl;',
        '"' => '&quot;',
        '#' => '&num;',
        '$' => '&dollar;',
        '%' => '&percnt;',
        '&' => '&amp;',
        '\'' => '&apos;',
        '(' => '&lpar;',
        ')' => '&rpar;',
        '*' => '&ast;',
        '+' => '&plus;',
        ',' => '&comma;',
        '.' => '&period;',
        '/' => '&sol;',
        ':' => '&colon;',
        ';' => '&semi;',
        '<' => '&lt;',
        '<⃒' => '&nvlt',
        '=' => '&equals;',
        '=⃥' => '&bne',
        '>' => '&gt;',
        '>⃒' => '&nvgt',
        '?' => '&quest;',
        '@' => '&commat;',
        '[' => '&lbrack;',
        '\\' => '&bsol;',
        ']' => '&rsqb;',
        '^' => '&Hat;',
        '_' => '&lowbar;',
        '`' => '&grave;',
        'fj' => '&fjlig',
        '{' => '&lbrace;',
        '|' => '&vert;',
        '}' => '&rcub;',
        ' ' => '&nbsp;',
        '¡' => '&iexcl;',
        '¢' => '&cent;',
        '£' => '&pound;',
        '¤' => '&curren;',
        '¥' => '&yen;',
        '¦' => '&brvbar;',
        '§' => '&sect;',
        '¨' => '&DoubleDot;',
        '©' => '&copy;',
        'ª' => '&ordf;',
        '«' => '&laquo;',
        '¬' => '&not;',
        '­' => '&shy;',
        '®' => '&reg;',
        '¯' => '&macr;',
        '°' => '&deg;',
        '±' => '&plusmn;',
        '²' => '&sup2;',
        '³' => '&sup3;',
        '´' => '&DiacriticalAcute;',
        'µ' => '&micro;',
        '¶' => '&para;',
        '·' => '&CenterDot;',
        '¸' => '&Cedilla;',
        '¹' => '&sup1;',
        'º' => '&ordm;',
        '»' => '&raquo;',
        '¼' => '&frac14;',
        '½' => '&half;',
        '¾' => '&frac34;',
        '¿' => '&iquest;',
        'À' => '&Agrave;',
        'Á' => '&Aacute;',
        'Â' => '&Acirc;',
        'Ã' => '&Atilde;',
        'Ä' => '&Auml;',
        'Å' => '&Aring;',
        'Æ' => '&AElig;',
        'Ç' => '&Ccedil;',
        'È' => '&Egrave;',
        'É' => '&Eacute;',
        'Ê' => '&Ecirc;',
        'Ë' => '&Euml;',
        'Ì' => '&Igrave;',
        'Í' => '&Iacute;',
        'Î' => '&Icirc;',
        'Ï' => '&Iuml;',
        'Ð' => '&ETH;',
        'Ñ' => '&Ntilde;',
        'Ò' => '&Ograve;',
        'Ó' => '&Oacute;',
        'Ô' => '&Ocirc;',
        'Õ' => '&Otilde;',
        'Ö' => '&Ouml;',
        '×' => '&times;',
        'Ø' => '&Oslash;',
        'Ù' => '&Ugrave;',
        'Ú' => '&Uacute;',
        'Û' => '&Ucirc;',
        'Ü' => '&Uuml;',
        'Ý' => '&Yacute;',
        'Þ' => '&THORN;',
        'ß' => '&szlig;',
        'à' => '&agrave;',
        'á' => '&aacute;',
        'â' => '&acirc;',
        'ã' => '&atilde;',
        'ä' => '&auml;',
        'å' => '&aring;',
        'æ' => '&aelig;',
        'ç' => '&ccedil;',
        'è' => '&egrave;',
        'é' => '&eacute;',
        'ê' => '&ecirc;',
        'ë' => '&euml;',
        'ì' => '&igrave;',
        'í' => '&iacute;',
        'î' => '&icirc;',
        'ï' => '&iuml;',
        'ð' => '&eth;',
        'ñ' => '&ntilde;',
        'ò' => '&ograve;',
        'ó' => '&oacute;',
        'ô' => '&ocirc;',
        'õ' => '&otilde;',
        'ö' => '&ouml;',
        '÷' => '&divide;',
        'ø' => '&oslash;',
        'ù' => '&ugrave;',
        'ú' => '&uacute;',
        'û' => '&ucirc;',
        'ü' => '&uuml;',
        'ý' => '&yacute;',
        'þ' => '&thorn;',
        'ÿ' => '&yuml;',
        'Ā' => '&Amacr;',
        'ā' => '&amacr;',
        'Ă' => '&Abreve;',
        'ă' => '&abreve;',
        'Ą' => '&Aogon;',
        'ą' => '&aogon;',
        'Ć' => '&Cacute;',
        'ć' => '&cacute;',
        'Ĉ' => '&Ccirc;',
        'ĉ' => '&ccirc;',
        'Ċ' => '&Cdot;',
        'ċ' => '&cdot;',
        'Č' => '&Ccaron;',
        'č' => '&ccaron;',
        'Ď' => '&Dcaron;',
        'ď' => '&dcaron;',
        'Đ' => '&Dstrok;',
        'đ' => '&dstrok;',
        'Ē' => '&Emacr;',
        'ē' => '&emacr;',
        'Ė' => '&Edot;',
        'ė' => '&edot;',
        'Ę' => '&Eogon;',
        'ę' => '&eogon;',
        'Ě' => '&Ecaron;',
        'ě' => '&ecaron;',
        'Ĝ' => '&Gcirc;',
        'ĝ' => '&gcirc;',
        'Ğ' => '&Gbreve;',
        'ğ' => '&gbreve;',
        'Ġ' => '&Gdot;',
        'ġ' => '&gdot;',
        'Ģ' => '&Gcedil;',
        'Ĥ' => '&Hcirc;',
        'ĥ' => '&hcirc;',
        'Ħ' => '&Hstrok;',
        'ħ' => '&hstrok;',
        'Ĩ' => '&Itilde;',
        'ĩ' => '&itilde;',
        'Ī' => '&Imacr;',
        'ī' => '&imacr;',
        'Į' => '&Iogon;',
        'į' => '&iogon;',
        'İ' => '&Idot;',
        'ı' => '&inodot;',
        'Ĳ' => '&IJlig;',
        'ĳ' => '&ijlig;',
        'Ĵ' => '&Jcirc;',
        'ĵ' => '&jcirc;',
        'Ķ' => '&Kcedil;',
        'ķ' => '&kcedil;',
        'ĸ' => '&kgreen;',
        'Ĺ' => '&Lacute;',
        'ĺ' => '&lacute;',
        'Ļ' => '&Lcedil;',
        'ļ' => '&lcedil;',
        'Ľ' => '&Lcaron;',
        'ľ' => '&lcaron;',
        'Ŀ' => '&Lmidot;',
        'ŀ' => '&lmidot;',
        'Ł' => '&Lstrok;',
        'ł' => '&lstrok;',
        'Ń' => '&Nacute;',
        'ń' => '&nacute;',
        'Ņ' => '&Ncedil;',
        'ņ' => '&ncedil;',
        'Ň' => '&Ncaron;',
        'ň' => '&ncaron;',
        'ŉ' => '&napos;',
        'Ŋ' => '&ENG;',
        'ŋ' => '&eng;',
        'Ō' => '&Omacr;',
        'ō' => '&omacr;',
        'Ő' => '&Odblac;',
        'ő' => '&odblac;',
        'Œ' => '&OElig;',
        'œ' => '&oelig;',
        'Ŕ' => '&Racute;',
        'ŕ' => '&racute;',
        'Ŗ' => '&Rcedil;',
        'ŗ' => '&rcedil;',
        'Ř' => '&Rcaron;',
        'ř' => '&rcaron;',
        'Ś' => '&Sacute;',
        'ś' => '&sacute;',
        'Ŝ' => '&Scirc;',
        'ŝ' => '&scirc;',
        'Ş' => '&Scedil;',
        'ş' => '&scedil;',
        'Š' => '&Scaron;',
        'š' => '&scaron;',
        'Ţ' => '&Tcedil;',
        'ţ' => '&tcedil;',
        'Ť' => '&Tcaron;',
        'ť' => '&tcaron;',
        'Ŧ' => '&Tstrok;',
        'ŧ' => '&tstrok;',
        'Ũ' => '&Utilde;',
        'ũ' => '&utilde;',
        'Ū' => '&Umacr;',
        'ū' => '&umacr;',
        'Ŭ' => '&Ubreve;',
        'ŭ' => '&ubreve;',
        'Ů' => '&Uring;',
        'ů' => '&uring;',
        'Ű' => '&Udblac;',
        'ű' => '&udblac;',
        'Ų' => '&Uogon;',
        'ų' => '&uogon;',
        'Ŵ' => '&Wcirc;',
        'ŵ' => '&wcirc;',
        'Ŷ' => '&Ycirc;',
        'ŷ' => '&ycirc;',
        'Ÿ' => '&Yuml;',
        'Ź' => '&Zacute;',
        'ź' => '&zacute;',
        'Ż' => '&Zdot;',
        'ż' => '&zdot;',
        'Ž' => '&Zcaron;',
        'ž' => '&zcaron;',
        'ƒ' => '&fnof;',
        'Ƶ' => '&imped;',
        'ǵ' => '&gacute;',
        'ȷ' => '&jmath;',
        'ˆ' => '&circ;',
        'ˇ' => '&Hacek;',
        '˘' => '&Breve;',
        '˙' => '&dot;',
        '˚' => '&ring;',
        '˛' => '&ogon;',
        '˜' => '&DiacriticalTilde;',
        '˝' => '&DiacriticalDoubleAcute;',
        '̑' => '&DownBreve;',
        'Α' => '&Alpha;',
        'Β' => '&Beta;',
        'Γ' => '&Gamma;',
        'Δ' => '&Delta;',
        'Ε' => '&Epsilon;',
        'Ζ' => '&Zeta;',
        'Η' => '&Eta;',
        'Θ' => '&Theta;',
        'Ι' => '&Iota;',
        'Κ' => '&Kappa;',
        'Λ' => '&Lambda;',
        'Μ' => '&Mu;',
        'Ν' => '&Nu;',
        'Ξ' => '&Xi;',
        'Ο' => '&Omicron;',
        'Π' => '&Pi;',
        'Ρ' => '&Rho;',
        'Σ' => '&Sigma;',
        'Τ' => '&Tau;',
        'Υ' => '&Upsilon;',
        'Φ' => '&Phi;',
        'Χ' => '&Chi;',
        'Ψ' => '&Psi;',
        'Ω' => '&Omega;',
        'α' => '&alpha;',
        'β' => '&beta;',
        'γ' => '&gamma;',
        'δ' => '&delta;',
        'ε' => '&epsi;',
        'ζ' => '&zeta;',
        'η' => '&eta;',
        'θ' => '&theta;',
        'ι' => '&iota;',
        'κ' => '&kappa;',
        'λ' => '&lambda;',
        'μ' => '&mu;',
        'ν' => '&nu;',
        'ξ' => '&xi;',
        'ο' => '&omicron;',
        'π' => '&pi;',
        'ρ' => '&rho;',
        'ς' => '&sigmav;',
        'σ' => '&sigma;',
        'τ' => '&tau;',
        'υ' => '&upsi;',
        'φ' => '&phi;',
        'χ' => '&chi;',
        'ψ' => '&psi;',
        'ω' => '&omega;',
        'ϑ' => '&thetasym;',
        'ϒ' => '&upsih;',
        'ϕ' => '&straightphi;',
        'ϖ' => '&piv;',
        'Ϝ' => '&Gammad;',
        'ϝ' => '&gammad;',
        'ϰ' => '&varkappa;',
        'ϱ' => '&rhov;',
        'ϵ' => '&straightepsilon;',
        '϶' => '&backepsilon;',
        'Ё' => '&IOcy;',
        'Ђ' => '&DJcy;',
        'Ѓ' => '&GJcy;',
        'Є' => '&Jukcy;',
        'Ѕ' => '&DScy;',
        'І' => '&Iukcy;',
        'Ї' => '&YIcy;',
        'Ј' => '&Jsercy;',
        'Љ' => '&LJcy;',
        'Њ' => '&NJcy;',
        'Ћ' => '&TSHcy;',
        'Ќ' => '&KJcy;',
        'Ў' => '&Ubrcy;',
        'Џ' => '&DZcy;',
        'А' => '&Acy;',
        'Б' => '&Bcy;',
        'В' => '&Vcy;',
        'Г' => '&Gcy;',
        'Д' => '&Dcy;',
        'Е' => '&IEcy;',
        'Ж' => '&ZHcy;',
        'З' => '&Zcy;',
        'И' => '&Icy;',
        'Й' => '&Jcy;',
        'К' => '&Kcy;',
        'Л' => '&Lcy;',
        'М' => '&Mcy;',
        'Н' => '&Ncy;',
        'О' => '&Ocy;',
        'П' => '&Pcy;',
        'Р' => '&Rcy;',
        'С' => '&Scy;',
        'Т' => '&Tcy;',
        'У' => '&Ucy;',
        'Ф' => '&Fcy;',
        'Х' => '&KHcy;',
        'Ц' => '&TScy;',
        'Ч' => '&CHcy;',
        'Ш' => '&SHcy;',
        'Щ' => '&SHCHcy;',
        'Ъ' => '&HARDcy;',
        'Ы' => '&Ycy;',
        'Ь' => '&SOFTcy;',
        'Э' => '&Ecy;',
        'Ю' => '&YUcy;',
        'Я' => '&YAcy;',
        'а' => '&acy;',
        'б' => '&bcy;',
        'в' => '&vcy;',
        'г' => '&gcy;',
        'д' => '&dcy;',
        'е' => '&iecy;',
        'ж' => '&zhcy;',
        'з' => '&zcy;',
        'и' => '&icy;',
        'й' => '&jcy;',
        'к' => '&kcy;',
        'л' => '&lcy;',
        'м' => '&mcy;',
        'н' => '&ncy;',
        'о' => '&ocy;',
        'п' => '&pcy;',
        'р' => '&rcy;',
        'с' => '&scy;',
        'т' => '&tcy;',
        'у' => '&ucy;',
        'ф' => '&fcy;',
        'х' => '&khcy;',
        'ц' => '&tscy;',
        'ч' => '&chcy;',
        'ш' => '&shcy;',
        'щ' => '&shchcy;',
        'ъ' => '&hardcy;',
        'ы' => '&ycy;',
        'ь' => '&softcy;',
        'э' => '&ecy;',
        'ю' => '&yucy;',
        'я' => '&yacy;',
        'ё' => '&iocy;',
        'ђ' => '&djcy;',
        'ѓ' => '&gjcy;',
        'є' => '&jukcy;',
        'ѕ' => '&dscy;',
        'і' => '&iukcy;',
        'ї' => '&yicy;',
        'ј' => '&jsercy;',
        'љ' => '&ljcy;',
        'њ' => '&njcy;',
        'ћ' => '&tshcy;',
        'ќ' => '&kjcy;',
        'ў' => '&ubrcy;',
        'џ' => '&dzcy;',
        ' ' => '&ensp;',
        ' ' => '&emsp;',
        ' ' => '&emsp13;',
        ' ' => '&emsp14;',
        ' ' => '&numsp;',
        ' ' => '&puncsp;',
        ' ' => '&ThinSpace;',
        ' ' => '&hairsp;',
        '​' => '&ZeroWidthSpace;',
        '‌' => '&zwnj;',
        '‍' => '&zwj;',
        '‎' => '&lrm;',
        '‏' => '&rlm;',
        '‐' => '&hyphen;',
        '–' => '&ndash;',
        '—' => '&mdash;',
        '―' => '&horbar;',
        '‖' => '&Verbar;',
        '‘' => '&OpenCurlyQuote;',
        '’' => '&rsquo;',
        '‚' => '&sbquo;',
        '“' => '&OpenCurlyDoubleQuote;',
        '”' => '&rdquo;',
        '„' => '&bdquo;',
        '†' => '&dagger;',
        '‡' => '&Dagger;',
        '•' => '&bull;',
        '‥' => '&nldr;',
        '…' => '&hellip;',
        '‰' => '&permil;',
        '‱' => '&pertenk;',
        '′' => '&prime;',
        '″' => '&Prime;',
        '‴' => '&tprime;',
        '‵' => '&backprime;',
        '‹' => '&lsaquo;',
        '›' => '&rsaquo;',
        '‾' => '&oline;',
        '⁁' => '&caret;',
        '⁃' => '&hybull;',
        '⁄' => '&frasl;',
        '⁏' => '&bsemi;',
        '⁗' => '&qprime;',
        ' ' => '&MediumSpace;',
        '  ' => '&ThickSpace',
        '⁠' => '&NoBreak;',
        '⁡' => '&af;',
        '⁢' => '&InvisibleTimes;',
        '⁣' => '&ic;',
        '€' => '&euro;',
        '⃛' => '&TripleDot;',
        '⃜' => '&DotDot;',
        'ℂ' => '&complexes;',
        '℅' => '&incare;',
        'ℊ' => '&gscr;',
        'ℋ' => '&HilbertSpace;',
        'ℌ' => '&Hfr;',
        'ℍ' => '&Hopf;',
        'ℎ' => '&planckh;',
        'ℏ' => '&planck;',
        'ℐ' => '&imagline;',
        'ℑ' => '&Ifr;',
        'ℒ' => '&lagran;',
        'ℓ' => '&ell;',
        'ℕ' => '&naturals;',
        '№' => '&numero;',
        '℗' => '&copysr;',
        '℘' => '&wp;',
        'ℙ' => '&primes;',
        'ℚ' => '&rationals;',
        'ℛ' => '&realine;',
        'ℜ' => '&Rfr;',
        'ℝ' => '&Ropf;',
        '℞' => '&rx;',
        '™' => '&trade;',
        'ℤ' => '&Zopf;',
        '℧' => '&mho;',
        'ℨ' => '&Zfr;',
        '℩' => '&iiota;',
        'ℬ' => '&Bscr;',
        'ℭ' => '&Cfr;',
        'ℯ' => '&escr;',
        'ℰ' => '&expectation;',
        'ℱ' => '&Fouriertrf;',
        'ℳ' => '&Mellintrf;',
        'ℴ' => '&orderof;',
        'ℵ' => '&aleph;',
        'ℶ' => '&beth;',
        'ℷ' => '&gimel;',
        'ℸ' => '&daleth;',
        'ⅅ' => '&CapitalDifferentialD;',
        'ⅆ' => '&DifferentialD;',
        'ⅇ' => '&exponentiale;',
        'ⅈ' => '&ImaginaryI;',
        '⅓' => '&frac13;',
        '⅔' => '&frac23;',
        '⅕' => '&frac15;',
        '⅖' => '&frac25;',
        '⅗' => '&frac35;',
        '⅘' => '&frac45;',
        '⅙' => '&frac16;',
        '⅚' => '&frac56;',
        '⅛' => '&frac18;',
        '⅜' => '&frac38;',
        '⅝' => '&frac58;',
        '⅞' => '&frac78;',
        '←' => '&larr;',
        '↑' => '&uarr;',
        '→' => '&srarr;',
        '↓' => '&darr;',
        '↔' => '&harr;',
        '↕' => '&UpDownArrow;',
        '↖' => '&nwarrow;',
        '↗' => '&UpperRightArrow;',
        '↘' => '&LowerRightArrow;',
        '↙' => '&swarr;',
        '↚' => '&nleftarrow;',
        '↛' => '&nrarr;',
        '↝' => '&rarrw;',
        '↝̸' => '&nrarrw',
        '↞' => '&Larr;',
        '↟' => '&Uarr;',
        '↠' => '&twoheadrightarrow;',
        '↡' => '&Darr;',
        '↢' => '&larrtl;',
        '↣' => '&rarrtl;',
        '↤' => '&LeftTeeArrow;',
        '↥' => '&UpTeeArrow;',
        '↦' => '&map;',
        '↧' => '&DownTeeArrow;',
        '↩' => '&larrhk;',
        '↪' => '&rarrhk;',
        '↫' => '&larrlp;',
        '↬' => '&looparrowright;',
        '↭' => '&harrw;',
        '↮' => '&nleftrightarrow;',
        '↰' => '&Lsh;',
        '↱' => '&rsh;',
        '↲' => '&ldsh;',
        '↳' => '&rdsh;',
        '↵' => '&crarr;',
        '↶' => '&curvearrowleft;',
        '↷' => '&curarr;',
        '↺' => '&olarr;',
        '↻' => '&orarr;',
        '↼' => '&leftharpoonup;',
        '↽' => '&leftharpoondown;',
        '↾' => '&RightUpVector;',
        '↿' => '&uharl;',
        '⇀' => '&rharu;',
        '⇁' => '&rhard;',
        '⇂' => '&RightDownVector;',
        '⇃' => '&dharl;',
        '⇄' => '&rightleftarrows;',
        '⇅' => '&udarr;',
        '⇆' => '&lrarr;',
        '⇇' => '&llarr;',
        '⇈' => '&upuparrows;',
        '⇉' => '&rrarr;',
        '⇊' => '&downdownarrows;',
        '⇋' => '&leftrightharpoons;',
        '⇌' => '&rightleftharpoons;',
        '⇍' => '&nLeftarrow;',
        '⇎' => '&nhArr;',
        '⇏' => '&nrArr;',
        '⇐' => '&DoubleLeftArrow;',
        '⇑' => '&DoubleUpArrow;',
        '⇒' => '&Implies;',
        '⇓' => '&Downarrow;',
        '⇔' => '&hArr;',
        '⇕' => '&Updownarrow;',
        '⇖' => '&nwArr;',
        '⇗' => '&neArr;',
        '⇘' => '&seArr;',
        '⇙' => '&swArr;',
        '⇚' => '&lAarr;',
        '⇛' => '&rAarr;',
        '⇝' => '&zigrarr;',
        '⇤' => '&LeftArrowBar;',
        '⇥' => '&RightArrowBar;',
        '⇵' => '&DownArrowUpArrow;',
        '⇽' => '&loarr;',
        '⇾' => '&roarr;',
        '⇿' => '&hoarr;',
        '∀' => '&forall;',
        '∁' => '&comp;',
        '∂' => '&part;',
        '∂̸' => '&npart',
        '∃' => '&Exists;',
        '∄' => '&nexist;',
        '∅' => '&empty;',
        '∇' => '&nabla;',
        '∈' => '&isinv;',
        '∉' => '&notin;',
        '∋' => '&ReverseElement;',
        '∌' => '&notniva;',
        '∏' => '&prod;',
        '∐' => '&Coproduct;',
        '∑' => '&sum;',
        '−' => '&minus;',
        '∓' => '&MinusPlus;',
        '∔' => '&plusdo;',
        '∖' => '&ssetmn;',
        '∗' => '&lowast;',
        '∘' => '&compfn;',
        '√' => '&Sqrt;',
        '∝' => '&prop;',
        '∞' => '&infin;',
        '∟' => '&angrt;',
        '∠' => '&angle;',
        '∠⃒' => '&nang',
        '∡' => '&angmsd;',
        '∢' => '&angsph;',
        '∣' => '&mid;',
        '∤' => '&nshortmid;',
        '∥' => '&shortparallel;',
        '∦' => '&nparallel;',
        '∧' => '&and;',
        '∨' => '&or;',
        '∩' => '&cap;',
        '∩︀' => '&caps',
        '∪' => '&cup;',
        '∪︀' => '&cups',
        '∫' => '&Integral;',
        '∬' => '&Int;',
        '∭' => '&tint;',
        '∮' => '&ContourIntegral;',
        '∯' => '&DoubleContourIntegral;',
        '∰' => '&Cconint;',
        '∱' => '&cwint;',
        '∲' => '&cwconint;',
        '∳' => '&awconint;',
        '∴' => '&there4;',
        '∵' => '&Because;',
        '∶' => '&ratio;',
        '∷' => '&Colon;',
        '∸' => '&minusd;',
        '∺' => '&mDDot;',
        '∻' => '&homtht;',
        '∼' => '&sim;',
        '∼⃒' => '&nvsim',
        '∽' => '&bsim;',
        '∽̱' => '&race',
        '∾' => '&ac;',
        '∾̳' => '&acE',
        '∿' => '&acd;',
        '≀' => '&wr;',
        '≁' => '&NotTilde;',
        '≂' => '&esim;',
        '≂̸' => '&nesim',
        '≃' => '&simeq;',
        '≄' => '&nsime;',
        '≅' => '&TildeFullEqual;',
        '≆' => '&simne;',
        '≇' => '&ncong;',
        '≈' => '&approx;',
        '≉' => '&napprox;',
        '≊' => '&ape;',
        '≋' => '&apid;',
        '≋̸' => '&napid',
        '≌' => '&bcong;',
        '≍' => '&CupCap;',
        '≍⃒' => '&nvap',
        '≎' => '&bump;',
        '≎̸' => '&nbump',
        '≏' => '&HumpEqual;',
        '≏̸' => '&nbumpe',
        '≐' => '&esdot;',
        '≐̸' => '&nedot',
        '≑' => '&doteqdot;',
        '≒' => '&fallingdotseq;',
        '≓' => '&risingdotseq;',
        '≔' => '&coloneq;',
        '≕' => '&eqcolon;',
        '≖' => '&ecir;',
        '≗' => '&circeq;',
        '≙' => '&wedgeq;',
        '≚' => '&veeeq;',
        '≜' => '&triangleq;',
        '≟' => '&equest;',
        '≠' => '&NotEqual;',
        '≡' => '&Congruent;',
        '≡⃥' => '&bnequiv',
        '≢' => '&NotCongruent;',
        '≤' => '&leq;',
        '≤⃒' => '&nvle',
        '≥' => '&ge;',
        '≥⃒' => '&nvge',
        '≦' => '&lE;',
        '≦̸' => '&nlE',
        '≧' => '&geqq;',
        '≧̸' => '&NotGreaterFullEqual',
        '≨' => '&lneqq;',
        '≨︀' => '&lvertneqq',
        '≩' => '&gneqq;',
        '≩︀' => '&gvertneqq',
        '≪' => '&ll;',
        '≪̸' => '&nLtv',
        '≪⃒' => '&nLt',
        '≫' => '&gg;',
        '≫̸' => '&NotGreaterGreater',
        '≫⃒' => '&nGt',
        '≬' => '&between;',
        '≭' => '&NotCupCap;',
        '≮' => '&NotLess;',
        '≯' => '&ngtr;',
        '≰' => '&NotLessEqual;',
        '≱' => '&ngeq;',
        '≲' => '&LessTilde;',
        '≳' => '&GreaterTilde;',
        '≴' => '&nlsim;',
        '≵' => '&ngsim;',
        '≶' => '&lessgtr;',
        '≷' => '&gl;',
        '≸' => '&ntlg;',
        '≹' => '&NotGreaterLess;',
        '≺' => '&prec;',
        '≻' => '&succ;',
        '≼' => '&PrecedesSlantEqual;',
        '≽' => '&succcurlyeq;',
        '≾' => '&precsim;',
        '≿' => '&SucceedsTilde;',
        '≿̸' => '&NotSucceedsTilde',
        '⊀' => '&npr;',
        '⊁' => '&NotSucceeds;',
        '⊂' => '&sub;',
        '⊂⃒' => '&vnsub',
        '⊃' => '&sup;',
        '⊃⃒' => '&nsupset',
        '⊄' => '&nsub;',
        '⊅' => '&nsup;',
        '⊆' => '&SubsetEqual;',
        '⊇' => '&supe;',
        '⊈' => '&NotSubsetEqual;',
        '⊉' => '&NotSupersetEqual;',
        '⊊' => '&subsetneq;',
        '⊊︀' => '&vsubne',
        '⊋' => '&supsetneq;',
        '⊋︀' => '&vsupne',
        '⊍' => '&cupdot;',
        '⊎' => '&UnionPlus;',
        '⊏' => '&sqsub;',
        '⊏̸' => '&NotSquareSubset',
        '⊐' => '&sqsupset;',
        '⊐̸' => '&NotSquareSuperset',
        '⊑' => '&SquareSubsetEqual;',
        '⊒' => '&SquareSupersetEqual;',
        '⊓' => '&sqcap;',
        '⊓︀' => '&sqcaps',
        '⊔' => '&sqcup;',
        '⊔︀' => '&sqcups',
        '⊕' => '&CirclePlus;',
        '⊖' => '&ominus;',
        '⊗' => '&CircleTimes;',
        '⊘' => '&osol;',
        '⊙' => '&CircleDot;',
        '⊚' => '&ocir;',
        '⊛' => '&oast;',
        '⊝' => '&odash;',
        '⊞' => '&boxplus;',
        '⊟' => '&boxminus;',
        '⊠' => '&timesb;',
        '⊡' => '&sdotb;',
        '⊢' => '&vdash;',
        '⊣' => '&dashv;',
        '⊤' => '&DownTee;',
        '⊥' => '&perp;',
        '⊧' => '&models;',
        '⊨' => '&DoubleRightTee;',
        '⊩' => '&Vdash;',
        '⊪' => '&Vvdash;',
        '⊫' => '&VDash;',
        '⊬' => '&nvdash;',
        '⊭' => '&nvDash;',
        '⊮' => '&nVdash;',
        '⊯' => '&nVDash;',
        '⊰' => '&prurel;',
        '⊲' => '&vartriangleleft;',
        '⊳' => '&vrtri;',
        '⊴' => '&LeftTriangleEqual;',
        '⊴⃒' => '&nvltrie',
        '⊵' => '&RightTriangleEqual;',
        '⊵⃒' => '&nvrtrie',
        '⊶' => '&origof;',
        '⊷' => '&imof;',
        '⊸' => '&mumap;',
        '⊹' => '&hercon;',
        '⊺' => '&intcal;',
        '⊻' => '&veebar;',
        '⊽' => '&barvee;',
        '⊾' => '&angrtvb;',
        '⊿' => '&lrtri;',
        '⋀' => '&xwedge;',
        '⋁' => '&xvee;',
        '⋂' => '&bigcap;',
        '⋃' => '&bigcup;',
        '⋄' => '&diamond;',
        '⋅' => '&sdot;',
        '⋆' => '&Star;',
        '⋇' => '&divonx;',
        '⋈' => '&bowtie;',
        '⋉' => '&ltimes;',
        '⋊' => '&rtimes;',
        '⋋' => '&lthree;',
        '⋌' => '&rthree;',
        '⋍' => '&backsimeq;',
        '⋎' => '&curlyvee;',
        '⋏' => '&curlywedge;',
        '⋐' => '&Sub;',
        '⋑' => '&Supset;',
        '⋒' => '&Cap;',
        '⋓' => '&Cup;',
        '⋔' => '&pitchfork;',
        '⋕' => '&epar;',
        '⋖' => '&lessdot;',
        '⋗' => '&gtrdot;',
        '⋘' => '&Ll;',
        '⋘̸' => '&nLl',
        '⋙' => '&Gg;',
        '⋙̸' => '&nGg',
        '⋚' => '&lesseqgtr;',
        '⋚︀' => '&lesg',
        '⋛' => '&gtreqless;',
        '⋛︀' => '&gesl',
        '⋞' => '&curlyeqprec;',
        '⋟' => '&cuesc;',
        '⋠' => '&NotPrecedesSlantEqual;',
        '⋡' => '&NotSucceedsSlantEqual;',
        '⋢' => '&NotSquareSubsetEqual;',
        '⋣' => '&NotSquareSupersetEqual;',
        '⋦' => '&lnsim;',
        '⋧' => '&gnsim;',
        '⋨' => '&precnsim;',
        '⋩' => '&scnsim;',
        '⋪' => '&nltri;',
        '⋫' => '&ntriangleright;',
        '⋬' => '&nltrie;',
        '⋭' => '&NotRightTriangleEqual;',
        '⋮' => '&vellip;',
        '⋯' => '&ctdot;',
        '⋰' => '&utdot;',
        '⋱' => '&dtdot;',
        '⋲' => '&disin;',
        '⋳' => '&isinsv;',
        '⋴' => '&isins;',
        '⋵' => '&isindot;',
        '⋵̸' => '&notindot',
        '⋶' => '&notinvc;',
        '⋷' => '&notinvb;',
        '⋹' => '&isinE;',
        '⋹̸' => '&notinE',
        '⋺' => '&nisd;',
        '⋻' => '&xnis;',
        '⋼' => '&nis;',
        '⋽' => '&notnivc;',
        '⋾' => '&notnivb;',
        '⌅' => '&barwed;',
        '⌆' => '&doublebarwedge;',
        '⌈' => '&lceil;',
        '⌉' => '&RightCeiling;',
        '⌊' => '&LeftFloor;',
        '⌋' => '&RightFloor;',
        '⌌' => '&drcrop;',
        '⌍' => '&dlcrop;',
        '⌎' => '&urcrop;',
        '⌏' => '&ulcrop;',
        '⌐' => '&bnot;',
        '⌒' => '&profline;',
        '⌓' => '&profsurf;',
        '⌕' => '&telrec;',
        '⌖' => '&target;',
        '⌜' => '&ulcorner;',
        '⌝' => '&urcorner;',
        '⌞' => '&llcorner;',
        '⌟' => '&drcorn;',
        '⌢' => '&frown;',
        '⌣' => '&smile;',
        '⌭' => '&cylcty;',
        '⌮' => '&profalar;',
        '⌶' => '&topbot;',
        '⌽' => '&ovbar;',
        '⌿' => '&solbar;',
        '⍼' => '&angzarr;',
        '⎰' => '&lmoust;',
        '⎱' => '&rmoust;',
        '⎴' => '&OverBracket;',
        '⎵' => '&bbrk;',
        '⎶' => '&bbrktbrk;',
        '⏜' => '&OverParenthesis;',
        '⏝' => '&UnderParenthesis;',
        '⏞' => '&OverBrace;',
        '⏟' => '&UnderBrace;',
        '⏢' => '&trpezium;',
        '⏧' => '&elinters;',
        '␣' => '&blank;',
        'Ⓢ' => '&oS;',
        '─' => '&HorizontalLine;',
        '│' => '&boxv;',
        '┌' => '&boxdr;',
        '┐' => '&boxdl;',
        '└' => '&boxur;',
        '┘' => '&boxul;',
        '├' => '&boxvr;',
        '┤' => '&boxvl;',
        '┬' => '&boxhd;',
        '┴' => '&boxhu;',
        '┼' => '&boxvh;',
        '═' => '&boxH;',
        '║' => '&boxV;',
        '╒' => '&boxdR;',
        '╓' => '&boxDr;',
        '╔' => '&boxDR;',
        '╕' => '&boxdL;',
        '╖' => '&boxDl;',
        '╗' => '&boxDL;',
        '╘' => '&boxuR;',
        '╙' => '&boxUr;',
        '╚' => '&boxUR;',
        '╛' => '&boxuL;',
        '╜' => '&boxUl;',
        '╝' => '&boxUL;',
        '╞' => '&boxvR;',
        '╟' => '&boxVr;',
        '╠' => '&boxVR;',
        '╡' => '&boxvL;',
        '╢' => '&boxVl;',
        '╣' => '&boxVL;',
        '╤' => '&boxHd;',
        '╥' => '&boxhD;',
        '╦' => '&boxHD;',
        '╧' => '&boxHu;',
        '╨' => '&boxhU;',
        '╩' => '&boxHU;',
        '╪' => '&boxvH;',
        '╫' => '&boxVh;',
        '╬' => '&boxVH;',
        '▀' => '&uhblk;',
        '▄' => '&lhblk;',
        '█' => '&block;',
        '░' => '&blk14;',
        '▒' => '&blk12;',
        '▓' => '&blk34;',
        '□' => '&Square;',
        '▪' => '&squarf;',
        '▫' => '&EmptyVerySmallSquare;',
        '▭' => '&rect;',
        '▮' => '&marker;',
        '▱' => '&fltns;',
        '△' => '&bigtriangleup;',
        '▴' => '&blacktriangle;',
        '▵' => '&triangle;',
        '▸' => '&blacktriangleright;',
        '▹' => '&rtri;',
        '▽' => '&bigtriangledown;',
        '▾' => '&blacktriangledown;',
        '▿' => '&triangledown;',
        '◂' => '&blacktriangleleft;',
        '◃' => '&ltri;',
        '◊' => '&lozenge;',
        '○' => '&cir;',
        '◬' => '&tridot;',
        '◯' => '&bigcirc;',
        '◸' => '&ultri;',
        '◹' => '&urtri;',
        '◺' => '&lltri;',
        '◻' => '&EmptySmallSquare;',
        '◼' => '&FilledSmallSquare;',
        '★' => '&starf;',
        '☆' => '&star;',
        '☎' => '&phone;',
        '♀' => '&female;',
        '♂' => '&male;',
        '♠' => '&spadesuit;',
        '♣' => '&clubs;',
        '♥' => '&hearts;',
        '♦' => '&diamondsuit;',
        '♪' => '&sung;',
        '♭' => '&flat;',
        '♮' => '&natur;',
        '♯' => '&sharp;',
        '✓' => '&check;',
        '✗' => '&cross;',
        '✠' => '&maltese;',
        '✶' => '&sext;',
        '❘' => '&VerticalSeparator;',
        '❲' => '&lbbrk;',
        '❳' => '&rbbrk;',
        '⟈' => '&bsolhsub;',
        '⟉' => '&suphsol;',
        '⟦' => '&LeftDoubleBracket;',
        '⟧' => '&RightDoubleBracket;',
        '⟨' => '&langle;',
        '⟩' => '&RightAngleBracket;',
        '⟪' => '&Lang;',
        '⟫' => '&Rang;',
        '⟬' => '&loang;',
        '⟭' => '&roang;',
        '⟵' => '&longleftarrow;',
        '⟶' => '&LongRightArrow;',
        '⟷' => '&LongLeftRightArrow;',
        '⟸' => '&xlArr;',
        '⟹' => '&DoubleLongRightArrow;',
        '⟺' => '&xhArr;',
        '⟼' => '&xmap;',
        '⟿' => '&dzigrarr;',
        '⤂' => '&nvlArr;',
        '⤃' => '&nvrArr;',
        '⤄' => '&nvHarr;',
        '⤅' => '&Map;',
        '⤌' => '&lbarr;',
        '⤍' => '&bkarow;',
        '⤎' => '&lBarr;',
        '⤏' => '&dbkarow;',
        '⤐' => '&drbkarow;',
        '⤑' => '&DDotrahd;',
        '⤒' => '&UpArrowBar;',
        '⤓' => '&DownArrowBar;',
        '⤖' => '&Rarrtl;',
        '⤙' => '&latail;',
        '⤚' => '&ratail;',
        '⤛' => '&lAtail;',
        '⤜' => '&rAtail;',
        '⤝' => '&larrfs;',
        '⤞' => '&rarrfs;',
        '⤟' => '&larrbfs;',
        '⤠' => '&rarrbfs;',
        '⤣' => '&nwarhk;',
        '⤤' => '&nearhk;',
        '⤥' => '&searhk;',
        '⤦' => '&swarhk;',
        '⤧' => '&nwnear;',
        '⤨' => '&toea;',
        '⤩' => '&seswar;',
        '⤪' => '&swnwar;',
        '⤳' => '&rarrc;',
        '⤳̸' => '&nrarrc',
        '⤵' => '&cudarrr;',
        '⤶' => '&ldca;',
        '⤷' => '&rdca;',
        '⤸' => '&cudarrl;',
        '⤹' => '&larrpl;',
        '⤼' => '&curarrm;',
        '⤽' => '&cularrp;',
        '⥅' => '&rarrpl;',
        '⥈' => '&harrcir;',
        '⥉' => '&Uarrocir;',
        '⥊' => '&lurdshar;',
        '⥋' => '&ldrushar;',
        '⥎' => '&LeftRightVector;',
        '⥏' => '&RightUpDownVector;',
        '⥐' => '&DownLeftRightVector;',
        '⥑' => '&LeftUpDownVector;',
        '⥒' => '&LeftVectorBar;',
        '⥓' => '&RightVectorBar;',
        '⥔' => '&RightUpVectorBar;',
        '⥕' => '&RightDownVectorBar;',
        '⥖' => '&DownLeftVectorBar;',
        '⥗' => '&DownRightVectorBar;',
        '⥘' => '&LeftUpVectorBar;',
        '⥙' => '&LeftDownVectorBar;',
        '⥚' => '&LeftTeeVector;',
        '⥛' => '&RightTeeVector;',
        '⥜' => '&RightUpTeeVector;',
        '⥝' => '&RightDownTeeVector;',
        '⥞' => '&DownLeftTeeVector;',
        '⥟' => '&DownRightTeeVector;',
        '⥠' => '&LeftUpTeeVector;',
        '⥡' => '&LeftDownTeeVector;',
        '⥢' => '&lHar;',
        '⥣' => '&uHar;',
        '⥤' => '&rHar;',
        '⥥' => '&dHar;',
        '⥦' => '&luruhar;',
        '⥧' => '&ldrdhar;',
        '⥨' => '&ruluhar;',
        '⥩' => '&rdldhar;',
        '⥪' => '&lharul;',
        '⥫' => '&llhard;',
        '⥬' => '&rharul;',
        '⥭' => '&lrhard;',
        '⥮' => '&udhar;',
        '⥯' => '&ReverseUpEquilibrium;',
        '⥰' => '&RoundImplies;',
        '⥱' => '&erarr;',
        '⥲' => '&simrarr;',
        '⥳' => '&larrsim;',
        '⥴' => '&rarrsim;',
        '⥵' => '&rarrap;',
        '⥶' => '&ltlarr;',
        '⥸' => '&gtrarr;',
        '⥹' => '&subrarr;',
        '⥻' => '&suplarr;',
        '⥼' => '&lfisht;',
        '⥽' => '&rfisht;',
        '⥾' => '&ufisht;',
        '⥿' => '&dfisht;',
        '⦅' => '&lopar;',
        '⦆' => '&ropar;',
        '⦋' => '&lbrke;',
        '⦌' => '&rbrke;',
        '⦍' => '&lbrkslu;',
        '⦎' => '&rbrksld;',
        '⦏' => '&lbrksld;',
        '⦐' => '&rbrkslu;',
        '⦑' => '&langd;',
        '⦒' => '&rangd;',
        '⦓' => '&lparlt;',
        '⦔' => '&rpargt;',
        '⦕' => '&gtlPar;',
        '⦖' => '&ltrPar;',
        '⦚' => '&vzigzag;',
        '⦜' => '&vangrt;',
        '⦝' => '&angrtvbd;',
        '⦤' => '&ange;',
        '⦥' => '&range;',
        '⦦' => '&dwangle;',
        '⦧' => '&uwangle;',
        '⦨' => '&angmsdaa;',
        '⦩' => '&angmsdab;',
        '⦪' => '&angmsdac;',
        '⦫' => '&angmsdad;',
        '⦬' => '&angmsdae;',
        '⦭' => '&angmsdaf;',
        '⦮' => '&angmsdag;',
        '⦯' => '&angmsdah;',
        '⦰' => '&bemptyv;',
        '⦱' => '&demptyv;',
        '⦲' => '&cemptyv;',
        '⦳' => '&raemptyv;',
        '⦴' => '&laemptyv;',
        '⦵' => '&ohbar;',
        '⦶' => '&omid;',
        '⦷' => '&opar;',
        '⦹' => '&operp;',
        '⦻' => '&olcross;',
        '⦼' => '&odsold;',
        '⦾' => '&olcir;',
        '⦿' => '&ofcir;',
        '⧀' => '&olt;',
        '⧁' => '&ogt;',
        '⧂' => '&cirscir;',
        '⧃' => '&cirE;',
        '⧄' => '&solb;',
        '⧅' => '&bsolb;',
        '⧉' => '&boxbox;',
        '⧍' => '&trisb;',
        '⧎' => '&rtriltri;',
        '⧏' => '&LeftTriangleBar;',
        '⧏̸' => '&NotLeftTriangleBar',
        '⧐' => '&RightTriangleBar;',
        '⧐̸' => '&NotRightTriangleBar',
        '⧜' => '&iinfin;',
        '⧝' => '&infintie;',
        '⧞' => '&nvinfin;',
        '⧣' => '&eparsl;',
        '⧤' => '&smeparsl;',
        '⧥' => '&eqvparsl;',
        '⧫' => '&lozf;',
        '⧴' => '&RuleDelayed;',
        '⧶' => '&dsol;',
        '⨀' => '&xodot;',
        '⨁' => '&bigoplus;',
        '⨂' => '&bigotimes;',
        '⨄' => '&biguplus;',
        '⨆' => '&bigsqcup;',
        '⨌' => '&iiiint;',
        '⨍' => '&fpartint;',
        '⨐' => '&cirfnint;',
        '⨑' => '&awint;',
        '⨒' => '&rppolint;',
        '⨓' => '&scpolint;',
        '⨔' => '&npolint;',
        '⨕' => '&pointint;',
        '⨖' => '&quatint;',
        '⨗' => '&intlarhk;',
        '⨢' => '&pluscir;',
        '⨣' => '&plusacir;',
        '⨤' => '&simplus;',
        '⨥' => '&plusdu;',
        '⨦' => '&plussim;',
        '⨧' => '&plustwo;',
        '⨩' => '&mcomma;',
        '⨪' => '&minusdu;',
        '⨭' => '&loplus;',
        '⨮' => '&roplus;',
        '⨯' => '&Cross;',
        '⨰' => '&timesd;',
        '⨱' => '&timesbar;',
        '⨳' => '&smashp;',
        '⨴' => '&lotimes;',
        '⨵' => '&rotimes;',
        '⨶' => '&otimesas;',
        '⨷' => '&Otimes;',
        '⨸' => '&odiv;',
        '⨹' => '&triplus;',
        '⨺' => '&triminus;',
        '⨻' => '&tritime;',
        '⨼' => '&iprod;',
        '⨿' => '&amalg;',
        '⩀' => '&capdot;',
        '⩂' => '&ncup;',
        '⩃' => '&ncap;',
        '⩄' => '&capand;',
        '⩅' => '&cupor;',
        '⩆' => '&cupcap;',
        '⩇' => '&capcup;',
        '⩈' => '&cupbrcap;',
        '⩉' => '&capbrcup;',
        '⩊' => '&cupcup;',
        '⩋' => '&capcap;',
        '⩌' => '&ccups;',
        '⩍' => '&ccaps;',
        '⩐' => '&ccupssm;',
        '⩓' => '&And;',
        '⩔' => '&Or;',
        '⩕' => '&andand;',
        '⩖' => '&oror;',
        '⩗' => '&orslope;',
        '⩘' => '&andslope;',
        '⩚' => '&andv;',
        '⩛' => '&orv;',
        '⩜' => '&andd;',
        '⩝' => '&ord;',
        '⩟' => '&wedbar;',
        '⩦' => '&sdote;',
        '⩪' => '&simdot;',
        '⩭' => '&congdot;',
        '⩭̸' => '&ncongdot',
        '⩮' => '&easter;',
        '⩯' => '&apacir;',
        '⩰' => '&apE;',
        '⩰̸' => '&napE',
        '⩱' => '&eplus;',
        '⩲' => '&pluse;',
        '⩳' => '&Esim;',
        '⩴' => '&Colone;',
        '⩵' => '&Equal;',
        '⩷' => '&ddotseq;',
        '⩸' => '&equivDD;',
        '⩹' => '&ltcir;',
        '⩺' => '&gtcir;',
        '⩻' => '&ltquest;',
        '⩼' => '&gtquest;',
        '⩽' => '&les;',
        '⩽̸' => '&nles',
        '⩾' => '&ges;',
        '⩾̸' => '&nges',
        '⩿' => '&lesdot;',
        '⪀' => '&gesdot;',
        '⪁' => '&lesdoto;',
        '⪂' => '&gesdoto;',
        '⪃' => '&lesdotor;',
        '⪄' => '&gesdotol;',
        '⪅' => '&lap;',
        '⪆' => '&gap;',
        '⪇' => '&lne;',
        '⪈' => '&gne;',
        '⪉' => '&lnap;',
        '⪊' => '&gnap;',
        '⪋' => '&lesseqqgtr;',
        '⪌' => '&gEl;',
        '⪍' => '&lsime;',
        '⪎' => '&gsime;',
        '⪏' => '&lsimg;',
        '⪐' => '&gsiml;',
        '⪑' => '&lgE;',
        '⪒' => '&glE;',
        '⪓' => '&lesges;',
        '⪔' => '&gesles;',
        '⪕' => '&els;',
        '⪖' => '&egs;',
        '⪗' => '&elsdot;',
        '⪘' => '&egsdot;',
        '⪙' => '&el;',
        '⪚' => '&eg;',
        '⪝' => '&siml;',
        '⪞' => '&simg;',
        '⪟' => '&simlE;',
        '⪠' => '&simgE;',
        '⪡' => '&LessLess;',
        '⪡̸' => '&NotNestedLessLess',
        '⪢' => '&GreaterGreater;',
        '⪢̸' => '&NotNestedGreaterGreater',
        '⪤' => '&glj;',
        '⪥' => '&gla;',
        '⪦' => '&ltcc;',
        '⪧' => '&gtcc;',
        '⪨' => '&lescc;',
        '⪩' => '&gescc;',
        '⪪' => '&smt;',
        '⪫' => '&lat;',
        '⪬' => '&smte;',
        '⪬︀' => '&smtes',
        '⪭' => '&late;',
        '⪭︀' => '&lates',
        '⪮' => '&bumpE;',
        '⪯' => '&preceq;',
        '⪯̸' => '&NotPrecedesEqual',
        '⪰' => '&SucceedsEqual;',
        '⪰̸' => '&NotSucceedsEqual',
        '⪳' => '&prE;',
        '⪴' => '&scE;',
        '⪵' => '&precneqq;',
        '⪶' => '&scnE;',
        '⪷' => '&precapprox;',
        '⪸' => '&succapprox;',
        '⪹' => '&precnapprox;',
        '⪺' => '&succnapprox;',
        '⪻' => '&Pr;',
        '⪼' => '&Sc;',
        '⪽' => '&subdot;',
        '⪾' => '&supdot;',
        '⪿' => '&subplus;',
        '⫀' => '&supplus;',
        '⫁' => '&submult;',
        '⫂' => '&supmult;',
        '⫃' => '&subedot;',
        '⫄' => '&supedot;',
        '⫅' => '&subE;',
        '⫅̸' => '&nsubE',
        '⫆' => '&supseteqq;',
        '⫆̸' => '&nsupseteqq',
        '⫇' => '&subsim;',
        '⫈' => '&supsim;',
        '⫋' => '&subsetneqq;',
        '⫋︀' => '&vsubnE',
        '⫌' => '&supnE;',
        '⫌︀' => '&varsupsetneqq',
        '⫏' => '&csub;',
        '⫐' => '&csup;',
        '⫑' => '&csube;',
        '⫒' => '&csupe;',
        '⫓' => '&subsup;',
        '⫔' => '&supsub;',
        '⫕' => '&subsub;',
        '⫖' => '&supsup;',
        '⫗' => '&suphsub;',
        '⫘' => '&supdsub;',
        '⫙' => '&forkv;',
        '⫚' => '&topfork;',
        '⫛' => '&mlcp;',
        '⫤' => '&Dashv;',
        '⫦' => '&Vdashl;',
        '⫧' => '&Barv;',
        '⫨' => '&vBar;',
        '⫩' => '&vBarv;',
        '⫫' => '&Vbar;',
        '⫬' => '&Not;',
        '⫭' => '&bNot;',
        '⫮' => '&rnmid;',
        '⫯' => '&cirmid;',
        '⫰' => '&midcir;',
        '⫱' => '&topcir;',
        '⫲' => '&nhpar;',
        '⫳' => '&parsim;',
        '⫽︀' => '&varsupsetneqq',
        'ﬀ' => '&fflig;',
        'ﬁ' => '&filig;',
        'ﬂ' => '&fllig;',
        'ﬃ' => '&ffilig;',
        'ﬄ' => '&ffllig;',
        '𝒜' => '&Ascr;',
        '𝒞' => '&Cscr;',
        '𝒟' => '&Dscr;',
        '𝒢' => '&Gscr;',
        '𝒥' => '&Jscr;',
        '𝒦' => '&Kscr;',
        '𝒩' => '&Nscr;',
        '𝒪' => '&Oscr;',
        '𝒫' => '&Pscr;',
        '𝒬' => '&Qscr;',
        '𝒮' => '&Sscr;',
        '𝒯' => '&Tscr;',
        '𝒰' => '&Uscr;',
        '𝒱' => '&Vscr;',
        '𝒲' => '&Wscr;',
        '𝒳' => '&Xscr;',
        '𝒴' => '&Yscr;',
        '𝒵' => '&Zscr;',
        '𝒶' => '&ascr;',
        '𝒷' => '&bscr;',
        '𝒸' => '&cscr;',
        '𝒹' => '&dscr;',
        '𝒻' => '&fscr;',
        '𝒽' => '&hscr;',
        '𝒾' => '&iscr;',
        '𝒿' => '&jscr;',
        '𝓀' => '&kscr;',
        '𝓁' => '&lscr;',
        '𝓂' => '&mscr;',
        '𝓃' => '&nscr;',
        '𝓅' => '&pscr;',
        '𝓆' => '&qscr;',
        '𝓇' => '&rscr;',
        '𝓈' => '&sscr;',
        '𝓉' => '&tscr;',
        '𝓊' => '&uscr;',
        '𝓋' => '&vscr;',
        '𝓌' => '&wscr;',
        '𝓍' => '&xscr;',
        '𝓎' => '&yscr;',
        '𝓏' => '&zscr;',
        '𝔄' => '&Afr;',
        '𝔅' => '&Bfr;',
        '𝔇' => '&Dfr;',
        '𝔈' => '&Efr;',
        '𝔉' => '&Ffr;',
        '𝔊' => '&Gfr;',
        '𝔍' => '&Jfr;',
        '𝔎' => '&Kfr;',
        '𝔏' => '&Lfr;',
        '𝔐' => '&Mfr;',
        '𝔑' => '&Nfr;',
        '𝔒' => '&Ofr;',
        '𝔓' => '&Pfr;',
        '𝔔' => '&Qfr;',
        '𝔖' => '&Sfr;',
        '𝔗' => '&Tfr;',
        '𝔘' => '&Ufr;',
        '𝔙' => '&Vfr;',
        '𝔚' => '&Wfr;',
        '𝔛' => '&Xfr;',
        '𝔜' => '&Yfr;',
        '𝔞' => '&afr;',
        '𝔟' => '&bfr;',
        '𝔠' => '&cfr;',
        '𝔡' => '&dfr;',
        '𝔢' => '&efr;',
        '𝔣' => '&ffr;',
        '𝔤' => '&gfr;',
        '𝔥' => '&hfr;',
        '𝔦' => '&ifr;',
        '𝔧' => '&jfr;',
        '𝔨' => '&kfr;',
        '𝔩' => '&lfr;',
        '𝔪' => '&mfr;',
        '𝔫' => '&nfr;',
        '𝔬' => '&ofr;',
        '𝔭' => '&pfr;',
        '𝔮' => '&qfr;',
        '𝔯' => '&rfr;',
        '𝔰' => '&sfr;',
        '𝔱' => '&tfr;',
        '𝔲' => '&ufr;',
        '𝔳' => '&vfr;',
        '𝔴' => '&wfr;',
        '𝔵' => '&xfr;',
        '𝔶' => '&yfr;',
        '𝔷' => '&zfr;',
        '𝔸' => '&Aopf;',
        '𝔹' => '&Bopf;',
        '𝔻' => '&Dopf;',
        '𝔼' => '&Eopf;',
        '𝔽' => '&Fopf;',
        '𝔾' => '&Gopf;',
        '𝕀' => '&Iopf;',
        '𝕁' => '&Jopf;',
        '𝕂' => '&Kopf;',
        '𝕃' => '&Lopf;',
        '𝕄' => '&Mopf;',
        '𝕆' => '&Oopf;',
        '𝕊' => '&Sopf;',
        '𝕋' => '&Topf;',
        '𝕌' => '&Uopf;',
        '𝕍' => '&Vopf;',
        '𝕎' => '&Wopf;',
        '𝕏' => '&Xopf;',
        '𝕐' => '&Yopf;',
        '𝕒' => '&aopf;',
        '𝕓' => '&bopf;',
        '𝕔' => '&copf;',
        '𝕕' => '&dopf;',
        '𝕖' => '&eopf;',
        '𝕗' => '&fopf;',
        '𝕘' => '&gopf;',
        '𝕙' => '&hopf;',
        '𝕚' => '&iopf;',
        '𝕛' => '&jopf;',
        '𝕜' => '&kopf;',
        '𝕝' => '&lopf;',
        '𝕞' => '&mopf;',
        '𝕟' => '&nopf;',
        '𝕠' => '&oopf;',
        '𝕡' => '&popf;',
        '𝕢' => '&qopf;',
        '𝕣' => '&ropf;',
        '𝕤' => '&sopf;',
        '𝕥' => '&topf;',
        '𝕦' => '&uopf;',
        '𝕧' => '&vopf;',
        '𝕨' => '&wopf;',
        '𝕩' => '&xopf;',
        '𝕪' => '&yopf;',
        '𝕫' => '&zopf;',
    );
}
PK      ��Ze%ա  �    Serializer/README.mdnu W+A��        # The Serializer (Writer) Model

The serializer roughly follows sections _8.1 Writing HTML documents_ and section
_8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment,
and DOMNodeList into HTML5.

       [ HTML5 ]   // Interface for saving.
          ||
     [ Traverser ]   // Walk the DOM
          ||
       [ Rules ]     // Convert DOM elements into strings.
          ||
       [ HTML5 ]     // HTML5 document or fragment in text.


## HTML5 Class

Provides the top level interface for saving.

## The Traverser

Walks the DOM finding each element and passing it off to the output rules to
convert to HTML5.

## Output Rules

The output rules are defined in the RulesInterface which can have multiple
implementations. Currently, the OutputRules is the default implementation that
converts a DOM as is into HTML5.

## HTML5 String

The output of the process it HTML5 as a string or saved to a file.PK      ��Z{�7
  7
    Serializer/RulesInterface.phpnu W+A��        <?php
/**
 * @file
 * The interface definition for Rules to generate output.
 */

namespace Masterminds\HTML5\Serializer;

/**
 * To create a new rule set for writing output the RulesInterface needs to be implemented.
 * The resulting class can be specified in the options with the key of rules.
 *
 * For an example implementation see Serializer\OutputRules.
 */
interface RulesInterface
{
    /**
     * The class constructor.
     *
     * Note, before the rules can be used a traverser must be registered.
     *
     * @param mixed $output  The output stream to write output to.
     * @param array $options An array of options.
     */
    public function __construct($output, $options = array());

    /**
     * Register the traverser used in but the rules.
     *
     * Note, only one traverser can be used by the rules.
     *
     * @param Traverser $traverser The traverser used in the rules.
     *
     * @return RulesInterface $this for the current object.
     */
    public function setTraverser(Traverser $traverser);

    /**
     * Write a document element (\DOMDocument).
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param \DOMDocument $dom
     */
    public function document($dom);

    /**
     * Write an element.
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param mixed $ele
     */
    public function element($ele);

    /**
     * Write a text node.
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param mixed $ele
     */
    public function text($ele);

    /**
     * Write a CDATA node.
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param mixed $ele
     */
    public function cdata($ele);

    /**
     * Write a comment node.
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param mixed $ele
     */
    public function comment($ele);

    /**
     * Write a processor instruction.
     *
     * To learn about processor instructions see InstructionProcessor
     *
     * Instead of returning the result write it to the output stream ($output)
     * that was passed into the constructor.
     *
     * @param mixed $ele
     */
    public function processorInstruction($ele);
}
PK      ��Z��e�   �     Exception.phpnu W+A��        <?php

namespace Masterminds\HTML5;

/**
 * The base exception for the HTML5 project.
 */
class Exception extends \Exception
{
}
PK      ��ZȍRx�  �    Entities.phpnu W+A��        <?php

namespace Masterminds\HTML5;

/**
 * Entity lookup tables.
 * This class is automatically generated.
 */
class Entities
{
    public static $byName = array(
        'Aacute' => 'Á',
        'Aacut' => 'Á',
        'aacute' => 'á',
        'aacut' => 'á',
        'Abreve' => 'Ă',
        'abreve' => 'ă',
        'ac' => '∾',
        'acd' => '∿',
        'acE' => '∾̳',
        'Acirc' => 'Â',
        'Acir' => 'Â',
        'acirc' => 'â',
        'acir' => 'â',
        'acute' => '´',
        'acut' => '´',
        'Acy' => 'А',
        'acy' => 'а',
        'AElig' => 'Æ',
        'AEli' => 'Æ',
        'aelig' => 'æ',
        'aeli' => 'æ',
        'af' => '⁡',
        'Afr' => '𝔄',
        'afr' => '𝔞',
        'Agrave' => 'À',
        'Agrav' => 'À',
        'agrave' => 'à',
        'agrav' => 'à',
        'alefsym' => 'ℵ',
        'aleph' => 'ℵ',
        'Alpha' => 'Α',
        'alpha' => 'α',
        'Amacr' => 'Ā',
        'amacr' => 'ā',
        'amalg' => '⨿',
        'AMP' => '&',
        'AM' => '&',
        'amp' => '&',
        'am' => '&',
        'And' => '⩓',
        'and' => '∧',
        'andand' => '⩕',
        'andd' => '⩜',
        'andslope' => '⩘',
        'andv' => '⩚',
        'ang' => '∠',
        'ange' => '⦤',
        'angle' => '∠',
        'angmsd' => '∡',
        'angmsdaa' => '⦨',
        'angmsdab' => '⦩',
        'angmsdac' => '⦪',
        'angmsdad' => '⦫',
        'angmsdae' => '⦬',
        'angmsdaf' => '⦭',
        'angmsdag' => '⦮',
        'angmsdah' => '⦯',
        'angrt' => '∟',
        'angrtvb' => '⊾',
        'angrtvbd' => '⦝',
        'angsph' => '∢',
        'angst' => 'Å',
        'angzarr' => '⍼',
        'Aogon' => 'Ą',
        'aogon' => 'ą',
        'Aopf' => '𝔸',
        'aopf' => '𝕒',
        'ap' => '≈',
        'apacir' => '⩯',
        'apE' => '⩰',
        'ape' => '≊',
        'apid' => '≋',
        'apos' => '\'',
        'ApplyFunction' => '⁡',
        'approx' => '≈',
        'approxeq' => '≊',
        'Aring' => 'Å',
        'Arin' => 'Å',
        'aring' => 'å',
        'arin' => 'å',
        'Ascr' => '𝒜',
        'ascr' => '𝒶',
        'Assign' => '≔',
        'ast' => '*',
        'asymp' => '≈',
        'asympeq' => '≍',
        'Atilde' => 'Ã',
        'Atild' => 'Ã',
        'atilde' => 'ã',
        'atild' => 'ã',
        'Auml' => 'Ä',
        'Aum' => 'Ä',
        'auml' => 'ä',
        'aum' => 'ä',
        'awconint' => '∳',
        'awint' => '⨑',
        'backcong' => '≌',
        'backepsilon' => '϶',
        'backprime' => '‵',
        'backsim' => '∽',
        'backsimeq' => '⋍',
        'Backslash' => '∖',
        'Barv' => '⫧',
        'barvee' => '⊽',
        'Barwed' => '⌆',
        'barwed' => '⌅',
        'barwedge' => '⌅',
        'bbrk' => '⎵',
        'bbrktbrk' => '⎶',
        'bcong' => '≌',
        'Bcy' => 'Б',
        'bcy' => 'б',
        'bdquo' => '„',
        'becaus' => '∵',
        'Because' => '∵',
        'because' => '∵',
        'bemptyv' => '⦰',
        'bepsi' => '϶',
        'bernou' => 'ℬ',
        'Bernoullis' => 'ℬ',
        'Beta' => 'Β',
        'beta' => 'β',
        'beth' => 'ℶ',
        'between' => '≬',
        'Bfr' => '𝔅',
        'bfr' => '𝔟',
        'bigcap' => '⋂',
        'bigcirc' => '◯',
        'bigcup' => '⋃',
        'bigodot' => '⨀',
        'bigoplus' => '⨁',
        'bigotimes' => '⨂',
        'bigsqcup' => '⨆',
        'bigstar' => '★',
        'bigtriangledown' => '▽',
        'bigtriangleup' => '△',
        'biguplus' => '⨄',
        'bigvee' => '⋁',
        'bigwedge' => '⋀',
        'bkarow' => '⤍',
        'blacklozenge' => '⧫',
        'blacksquare' => '▪',
        'blacktriangle' => '▴',
        'blacktriangledown' => '▾',
        'blacktriangleleft' => '◂',
        'blacktriangleright' => '▸',
        'blank' => '␣',
        'blk12' => '▒',
        'blk14' => '░',
        'blk34' => '▓',
        'block' => '█',
        'bne' => '=⃥',
        'bnequiv' => '≡⃥',
        'bNot' => '⫭',
        'bnot' => '⌐',
        'Bopf' => '𝔹',
        'bopf' => '𝕓',
        'bot' => '⊥',
        'bottom' => '⊥',
        'bowtie' => '⋈',
        'boxbox' => '⧉',
        'boxDL' => '╗',
        'boxDl' => '╖',
        'boxdL' => '╕',
        'boxdl' => '┐',
        'boxDR' => '╔',
        'boxDr' => '╓',
        'boxdR' => '╒',
        'boxdr' => '┌',
        'boxH' => '═',
        'boxh' => '─',
        'boxHD' => '╦',
        'boxHd' => '╤',
        'boxhD' => '╥',
        'boxhd' => '┬',
        'boxHU' => '╩',
        'boxHu' => '╧',
        'boxhU' => '╨',
        'boxhu' => '┴',
        'boxminus' => '⊟',
        'boxplus' => '⊞',
        'boxtimes' => '⊠',
        'boxUL' => '╝',
        'boxUl' => '╜',
        'boxuL' => '╛',
        'boxul' => '┘',
        'boxUR' => '╚',
        'boxUr' => '╙',
        'boxuR' => '╘',
        'boxur' => '└',
        'boxV' => '║',
        'boxv' => '│',
        'boxVH' => '╬',
        'boxVh' => '╫',
        'boxvH' => '╪',
        'boxvh' => '┼',
        'boxVL' => '╣',
        'boxVl' => '╢',
        'boxvL' => '╡',
        'boxvl' => '┤',
        'boxVR' => '╠',
        'boxVr' => '╟',
        'boxvR' => '╞',
        'boxvr' => '├',
        'bprime' => '‵',
        'Breve' => '˘',
        'breve' => '˘',
        'brvbar' => '¦',
        'brvba' => '¦',
        'Bscr' => 'ℬ',
        'bscr' => '𝒷',
        'bsemi' => '⁏',
        'bsim' => '∽',
        'bsime' => '⋍',
        'bsol' => '\\',
        'bsolb' => '⧅',
        'bsolhsub' => '⟈',
        'bull' => '•',
        'bullet' => '•',
        'bump' => '≎',
        'bumpE' => '⪮',
        'bumpe' => '≏',
        'Bumpeq' => '≎',
        'bumpeq' => '≏',
        'Cacute' => 'Ć',
        'cacute' => 'ć',
        'Cap' => '⋒',
        'cap' => '∩',
        'capand' => '⩄',
        'capbrcup' => '⩉',
        'capcap' => '⩋',
        'capcup' => '⩇',
        'capdot' => '⩀',
        'CapitalDifferentialD' => 'ⅅ',
        'caps' => '∩︀',
        'caret' => '⁁',
        'caron' => 'ˇ',
        'Cayleys' => 'ℭ',
        'ccaps' => '⩍',
        'Ccaron' => 'Č',
        'ccaron' => 'č',
        'Ccedil' => 'Ç',
        'Ccedi' => 'Ç',
        'ccedil' => 'ç',
        'ccedi' => 'ç',
        'Ccirc' => 'Ĉ',
        'ccirc' => 'ĉ',
        'Cconint' => '∰',
        'ccups' => '⩌',
        'ccupssm' => '⩐',
        'Cdot' => 'Ċ',
        'cdot' => 'ċ',
        'cedil' => '¸',
        'cedi' => '¸',
        'Cedilla' => '¸',
        'cemptyv' => '⦲',
        'cent' => '¢',
        'cen' => '¢',
        'CenterDot' => '·',
        'centerdot' => '·',
        'Cfr' => 'ℭ',
        'cfr' => '𝔠',
        'CHcy' => 'Ч',
        'chcy' => 'ч',
        'check' => '✓',
        'checkmark' => '✓',
        'Chi' => 'Χ',
        'chi' => 'χ',
        'cir' => '○',
        'circ' => 'ˆ',
        'circeq' => '≗',
        'circlearrowleft' => '↺',
        'circlearrowright' => '↻',
        'circledast' => '⊛',
        'circledcirc' => '⊚',
        'circleddash' => '⊝',
        'CircleDot' => '⊙',
        'circledR' => '®',
        'circledS' => 'Ⓢ',
        'CircleMinus' => '⊖',
        'CirclePlus' => '⊕',
        'CircleTimes' => '⊗',
        'cirE' => '⧃',
        'cire' => '≗',
        'cirfnint' => '⨐',
        'cirmid' => '⫯',
        'cirscir' => '⧂',
        'ClockwiseContourIntegral' => '∲',
        'CloseCurlyDoubleQuote' => '”',
        'CloseCurlyQuote' => '’',
        'clubs' => '♣',
        'clubsuit' => '♣',
        'Colon' => '∷',
        'colon' => ':',
        'Colone' => '⩴',
        'colone' => '≔',
        'coloneq' => '≔',
        'comma' => ',',
        'commat' => '@',
        'comp' => '∁',
        'compfn' => '∘',
        'complement' => '∁',
        'complexes' => 'ℂ',
        'cong' => '≅',
        'congdot' => '⩭',
        'Congruent' => '≡',
        'Conint' => '∯',
        'conint' => '∮',
        'ContourIntegral' => '∮',
        'Copf' => 'ℂ',
        'copf' => '𝕔',
        'coprod' => '∐',
        'Coproduct' => '∐',
        'COPY' => '©',
        'COP' => '©',
        'copy' => '©',
        'cop' => '©',
        'copysr' => '℗',
        'CounterClockwiseContourIntegral' => '∳',
        'crarr' => '↵',
        'Cross' => '⨯',
        'cross' => '✗',
        'Cscr' => '𝒞',
        'cscr' => '𝒸',
        'csub' => '⫏',
        'csube' => '⫑',
        'csup' => '⫐',
        'csupe' => '⫒',
        'ctdot' => '⋯',
        'cudarrl' => '⤸',
        'cudarrr' => '⤵',
        'cuepr' => '⋞',
        'cuesc' => '⋟',
        'cularr' => '↶',
        'cularrp' => '⤽',
        'Cup' => '⋓',
        'cup' => '∪',
        'cupbrcap' => '⩈',
        'CupCap' => '≍',
        'cupcap' => '⩆',
        'cupcup' => '⩊',
        'cupdot' => '⊍',
        'cupor' => '⩅',
        'cups' => '∪︀',
        'curarr' => '↷',
        'curarrm' => '⤼',
        'curlyeqprec' => '⋞',
        'curlyeqsucc' => '⋟',
        'curlyvee' => '⋎',
        'curlywedge' => '⋏',
        'curren' => '¤',
        'curre' => '¤',
        'curvearrowleft' => '↶',
        'curvearrowright' => '↷',
        'cuvee' => '⋎',
        'cuwed' => '⋏',
        'cwconint' => '∲',
        'cwint' => '∱',
        'cylcty' => '⌭',
        'Dagger' => '‡',
        'dagger' => '†',
        'daleth' => 'ℸ',
        'Darr' => '↡',
        'dArr' => '⇓',
        'darr' => '↓',
        'dash' => '‐',
        'Dashv' => '⫤',
        'dashv' => '⊣',
        'dbkarow' => '⤏',
        'dblac' => '˝',
        'Dcaron' => 'Ď',
        'dcaron' => 'ď',
        'Dcy' => 'Д',
        'dcy' => 'д',
        'DD' => 'ⅅ',
        'dd' => 'ⅆ',
        'ddagger' => '‡',
        'ddarr' => '⇊',
        'DDotrahd' => '⤑',
        'ddotseq' => '⩷',
        'deg' => '°',
        'de' => '°',
        'Del' => '∇',
        'Delta' => 'Δ',
        'delta' => 'δ',
        'demptyv' => '⦱',
        'dfisht' => '⥿',
        'Dfr' => '𝔇',
        'dfr' => '𝔡',
        'dHar' => '⥥',
        'dharl' => '⇃',
        'dharr' => '⇂',
        'DiacriticalAcute' => '´',
        'DiacriticalDot' => '˙',
        'DiacriticalDoubleAcute' => '˝',
        'DiacriticalGrave' => '`',
        'DiacriticalTilde' => '˜',
        'diam' => '⋄',
        'Diamond' => '⋄',
        'diamond' => '⋄',
        'diamondsuit' => '♦',
        'diams' => '♦',
        'die' => '¨',
        'DifferentialD' => 'ⅆ',
        'digamma' => 'ϝ',
        'disin' => '⋲',
        'div' => '÷',
        'divide' => '÷',
        'divid' => '÷',
        'divideontimes' => '⋇',
        'divonx' => '⋇',
        'DJcy' => 'Ђ',
        'djcy' => 'ђ',
        'dlcorn' => '⌞',
        'dlcrop' => '⌍',
        'dollar' => '$',
        'Dopf' => '𝔻',
        'dopf' => '𝕕',
        'Dot' => '¨',
        'dot' => '˙',
        'DotDot' => '⃜',
        'doteq' => '≐',
        'doteqdot' => '≑',
        'DotEqual' => '≐',
        'dotminus' => '∸',
        'dotplus' => '∔',
        'dotsquare' => '⊡',
        'doublebarwedge' => '⌆',
        'DoubleContourIntegral' => '∯',
        'DoubleDot' => '¨',
        'DoubleDownArrow' => '⇓',
        'DoubleLeftArrow' => '⇐',
        'DoubleLeftRightArrow' => '⇔',
        'DoubleLeftTee' => '⫤',
        'DoubleLongLeftArrow' => '⟸',
        'DoubleLongLeftRightArrow' => '⟺',
        'DoubleLongRightArrow' => '⟹',
        'DoubleRightArrow' => '⇒',
        'DoubleRightTee' => '⊨',
        'DoubleUpArrow' => '⇑',
        'DoubleUpDownArrow' => '⇕',
        'DoubleVerticalBar' => '∥',
        'DownArrow' => '↓',
        'Downarrow' => '⇓',
        'downarrow' => '↓',
        'DownArrowBar' => '⤓',
        'DownArrowUpArrow' => '⇵',
        'DownBreve' => '̑',
        'downdownarrows' => '⇊',
        'downharpoonleft' => '⇃',
        'downharpoonright' => '⇂',
        'DownLeftRightVector' => '⥐',
        'DownLeftTeeVector' => '⥞',
        'DownLeftVector' => '↽',
        'DownLeftVectorBar' => '⥖',
        'DownRightTeeVector' => '⥟',
        'DownRightVector' => '⇁',
        'DownRightVectorBar' => '⥗',
        'DownTee' => '⊤',
        'DownTeeArrow' => '↧',
        'drbkarow' => '⤐',
        'drcorn' => '⌟',
        'drcrop' => '⌌',
        'Dscr' => '𝒟',
        'dscr' => '𝒹',
        'DScy' => 'Ѕ',
        'dscy' => 'ѕ',
        'dsol' => '⧶',
        'Dstrok' => 'Đ',
        'dstrok' => 'đ',
        'dtdot' => '⋱',
        'dtri' => '▿',
        'dtrif' => '▾',
        'duarr' => '⇵',
        'duhar' => '⥯',
        'dwangle' => '⦦',
        'DZcy' => 'Џ',
        'dzcy' => 'џ',
        'dzigrarr' => '⟿',
        'Eacute' => 'É',
        'Eacut' => 'É',
        'eacute' => 'é',
        'eacut' => 'é',
        'easter' => '⩮',
        'Ecaron' => 'Ě',
        'ecaron' => 'ě',
        'ecir' => 'ê',
        'Ecirc' => 'Ê',
        'Ecir' => 'Ê',
        'ecirc' => 'ê',
        'ecolon' => '≕',
        'Ecy' => 'Э',
        'ecy' => 'э',
        'eDDot' => '⩷',
        'Edot' => 'Ė',
        'eDot' => '≑',
        'edot' => 'ė',
        'ee' => 'ⅇ',
        'efDot' => '≒',
        'Efr' => '𝔈',
        'efr' => '𝔢',
        'eg' => '⪚',
        'Egrave' => 'È',
        'Egrav' => 'È',
        'egrave' => 'è',
        'egrav' => 'è',
        'egs' => '⪖',
        'egsdot' => '⪘',
        'el' => '⪙',
        'Element' => '∈',
        'elinters' => '⏧',
        'ell' => 'ℓ',
        'els' => '⪕',
        'elsdot' => '⪗',
        'Emacr' => 'Ē',
        'emacr' => 'ē',
        'empty' => '∅',
        'emptyset' => '∅',
        'EmptySmallSquare' => '◻',
        'emptyv' => '∅',
        'EmptyVerySmallSquare' => '▫',
        'emsp' => ' ',
        'emsp13' => ' ',
        'emsp14' => ' ',
        'ENG' => 'Ŋ',
        'eng' => 'ŋ',
        'ensp' => ' ',
        'Eogon' => 'Ę',
        'eogon' => 'ę',
        'Eopf' => '𝔼',
        'eopf' => '𝕖',
        'epar' => '⋕',
        'eparsl' => '⧣',
        'eplus' => '⩱',
        'epsi' => 'ε',
        'Epsilon' => 'Ε',
        'epsilon' => 'ε',
        'epsiv' => 'ϵ',
        'eqcirc' => '≖',
        'eqcolon' => '≕',
        'eqsim' => '≂',
        'eqslantgtr' => '⪖',
        'eqslantless' => '⪕',
        'Equal' => '⩵',
        'equals' => '=',
        'EqualTilde' => '≂',
        'equest' => '≟',
        'Equilibrium' => '⇌',
        'equiv' => '≡',
        'equivDD' => '⩸',
        'eqvparsl' => '⧥',
        'erarr' => '⥱',
        'erDot' => '≓',
        'Escr' => 'ℰ',
        'escr' => 'ℯ',
        'esdot' => '≐',
        'Esim' => '⩳',
        'esim' => '≂',
        'Eta' => 'Η',
        'eta' => 'η',
        'ETH' => 'Ð',
        'ET' => 'Ð',
        'eth' => 'ð',
        'et' => 'ð',
        'Euml' => 'Ë',
        'Eum' => 'Ë',
        'euml' => 'ë',
        'eum' => 'ë',
        'euro' => '€',
        'excl' => '!',
        'exist' => '∃',
        'Exists' => '∃',
        'expectation' => 'ℰ',
        'ExponentialE' => 'ⅇ',
        'exponentiale' => 'ⅇ',
        'fallingdotseq' => '≒',
        'Fcy' => 'Ф',
        'fcy' => 'ф',
        'female' => '♀',
        'ffilig' => 'ﬃ',
        'fflig' => 'ﬀ',
        'ffllig' => 'ﬄ',
        'Ffr' => '𝔉',
        'ffr' => '𝔣',
        'filig' => 'ﬁ',
        'FilledSmallSquare' => '◼',
        'FilledVerySmallSquare' => '▪',
        'fjlig' => 'fj',
        'flat' => '♭',
        'fllig' => 'ﬂ',
        'fltns' => '▱',
        'fnof' => 'ƒ',
        'Fopf' => '𝔽',
        'fopf' => '𝕗',
        'ForAll' => '∀',
        'forall' => '∀',
        'fork' => '⋔',
        'forkv' => '⫙',
        'Fouriertrf' => 'ℱ',
        'fpartint' => '⨍',
        'frac12' => '½',
        'frac1' => '¼',
        'frac13' => '⅓',
        'frac14' => '¼',
        'frac15' => '⅕',
        'frac16' => '⅙',
        'frac18' => '⅛',
        'frac23' => '⅔',
        'frac25' => '⅖',
        'frac34' => '¾',
        'frac3' => '¾',
        'frac35' => '⅗',
        'frac38' => '⅜',
        'frac45' => '⅘',
        'frac56' => '⅚',
        'frac58' => '⅝',
        'frac78' => '⅞',
        'frasl' => '⁄',
        'frown' => '⌢',
        'Fscr' => 'ℱ',
        'fscr' => '𝒻',
        'gacute' => 'ǵ',
        'Gamma' => 'Γ',
        'gamma' => 'γ',
        'Gammad' => 'Ϝ',
        'gammad' => 'ϝ',
        'gap' => '⪆',
        'Gbreve' => 'Ğ',
        'gbreve' => 'ğ',
        'Gcedil' => 'Ģ',
        'Gcirc' => 'Ĝ',
        'gcirc' => 'ĝ',
        'Gcy' => 'Г',
        'gcy' => 'г',
        'Gdot' => 'Ġ',
        'gdot' => 'ġ',
        'gE' => '≧',
        'ge' => '≥',
        'gEl' => '⪌',
        'gel' => '⋛',
        'geq' => '≥',
        'geqq' => '≧',
        'geqslant' => '⩾',
        'ges' => '⩾',
        'gescc' => '⪩',
        'gesdot' => '⪀',
        'gesdoto' => '⪂',
        'gesdotol' => '⪄',
        'gesl' => '⋛︀',
        'gesles' => '⪔',
        'Gfr' => '𝔊',
        'gfr' => '𝔤',
        'Gg' => '⋙',
        'gg' => '≫',
        'ggg' => '⋙',
        'gimel' => 'ℷ',
        'GJcy' => 'Ѓ',
        'gjcy' => 'ѓ',
        'gl' => '≷',
        'gla' => '⪥',
        'glE' => '⪒',
        'glj' => '⪤',
        'gnap' => '⪊',
        'gnapprox' => '⪊',
        'gnE' => '≩',
        'gne' => '⪈',
        'gneq' => '⪈',
        'gneqq' => '≩',
        'gnsim' => '⋧',
        'Gopf' => '𝔾',
        'gopf' => '𝕘',
        'grave' => '`',
        'GreaterEqual' => '≥',
        'GreaterEqualLess' => '⋛',
        'GreaterFullEqual' => '≧',
        'GreaterGreater' => '⪢',
        'GreaterLess' => '≷',
        'GreaterSlantEqual' => '⩾',
        'GreaterTilde' => '≳',
        'Gscr' => '𝒢',
        'gscr' => 'ℊ',
        'gsim' => '≳',
        'gsime' => '⪎',
        'gsiml' => '⪐',
        'GT' => '>',
        'G' => '>',
        'Gt' => '≫',
        'gt' => '>',
        'g' => '>',
        'gtcc' => '⪧',
        'gtcir' => '⩺',
        'gtdot' => '⋗',
        'gtlPar' => '⦕',
        'gtquest' => '⩼',
        'gtrapprox' => '⪆',
        'gtrarr' => '⥸',
        'gtrdot' => '⋗',
        'gtreqless' => '⋛',
        'gtreqqless' => '⪌',
        'gtrless' => '≷',
        'gtrsim' => '≳',
        'gvertneqq' => '≩︀',
        'gvnE' => '≩︀',
        'Hacek' => 'ˇ',
        'hairsp' => ' ',
        'half' => '½',
        'hamilt' => 'ℋ',
        'HARDcy' => 'Ъ',
        'hardcy' => 'ъ',
        'hArr' => '⇔',
        'harr' => '↔',
        'harrcir' => '⥈',
        'harrw' => '↭',
        'Hat' => '^',
        'hbar' => 'ℏ',
        'Hcirc' => 'Ĥ',
        'hcirc' => 'ĥ',
        'hearts' => '♥',
        'heartsuit' => '♥',
        'hellip' => '…',
        'hercon' => '⊹',
        'Hfr' => 'ℌ',
        'hfr' => '𝔥',
        'HilbertSpace' => 'ℋ',
        'hksearow' => '⤥',
        'hkswarow' => '⤦',
        'hoarr' => '⇿',
        'homtht' => '∻',
        'hookleftarrow' => '↩',
        'hookrightarrow' => '↪',
        'Hopf' => 'ℍ',
        'hopf' => '𝕙',
        'horbar' => '―',
        'HorizontalLine' => '─',
        'Hscr' => 'ℋ',
        'hscr' => '𝒽',
        'hslash' => 'ℏ',
        'Hstrok' => 'Ħ',
        'hstrok' => 'ħ',
        'HumpDownHump' => '≎',
        'HumpEqual' => '≏',
        'hybull' => '⁃',
        'hyphen' => '‐',
        'Iacute' => 'Í',
        'Iacut' => 'Í',
        'iacute' => 'í',
        'iacut' => 'í',
        'ic' => '⁣',
        'Icirc' => 'Î',
        'Icir' => 'Î',
        'icirc' => 'î',
        'icir' => 'î',
        'Icy' => 'И',
        'icy' => 'и',
        'Idot' => 'İ',
        'IEcy' => 'Е',
        'iecy' => 'е',
        'iexcl' => '¡',
        'iexc' => '¡',
        'iff' => '⇔',
        'Ifr' => 'ℑ',
        'ifr' => '𝔦',
        'Igrave' => 'Ì',
        'Igrav' => 'Ì',
        'igrave' => 'ì',
        'igrav' => 'ì',
        'ii' => 'ⅈ',
        'iiiint' => '⨌',
        'iiint' => '∭',
        'iinfin' => '⧜',
        'iiota' => '℩',
        'IJlig' => 'Ĳ',
        'ijlig' => 'ĳ',
        'Im' => 'ℑ',
        'Imacr' => 'Ī',
        'imacr' => 'ī',
        'image' => 'ℑ',
        'ImaginaryI' => 'ⅈ',
        'imagline' => 'ℐ',
        'imagpart' => 'ℑ',
        'imath' => 'ı',
        'imof' => '⊷',
        'imped' => 'Ƶ',
        'Implies' => '⇒',
        'in' => '∈',
        'incare' => '℅',
        'infin' => '∞',
        'infintie' => '⧝',
        'inodot' => 'ı',
        'Int' => '∬',
        'int' => '∫',
        'intcal' => '⊺',
        'integers' => 'ℤ',
        'Integral' => '∫',
        'intercal' => '⊺',
        'Intersection' => '⋂',
        'intlarhk' => '⨗',
        'intprod' => '⨼',
        'InvisibleComma' => '⁣',
        'InvisibleTimes' => '⁢',
        'IOcy' => 'Ё',
        'iocy' => 'ё',
        'Iogon' => 'Į',
        'iogon' => 'į',
        'Iopf' => '𝕀',
        'iopf' => '𝕚',
        'Iota' => 'Ι',
        'iota' => 'ι',
        'iprod' => '⨼',
        'iquest' => '¿',
        'iques' => '¿',
        'Iscr' => 'ℐ',
        'iscr' => '𝒾',
        'isin' => '∈',
        'isindot' => '⋵',
        'isinE' => '⋹',
        'isins' => '⋴',
        'isinsv' => '⋳',
        'isinv' => '∈',
        'it' => '⁢',
        'Itilde' => 'Ĩ',
        'itilde' => 'ĩ',
        'Iukcy' => 'І',
        'iukcy' => 'і',
        'Iuml' => 'Ï',
        'Ium' => 'Ï',
        'iuml' => 'ï',
        'ium' => 'ï',
        'Jcirc' => 'Ĵ',
        'jcirc' => 'ĵ',
        'Jcy' => 'Й',
        'jcy' => 'й',
        'Jfr' => '𝔍',
        'jfr' => '𝔧',
        'jmath' => 'ȷ',
        'Jopf' => '𝕁',
        'jopf' => '𝕛',
        'Jscr' => '𝒥',
        'jscr' => '𝒿',
        'Jsercy' => 'Ј',
        'jsercy' => 'ј',
        'Jukcy' => 'Є',
        'jukcy' => 'є',
        'Kappa' => 'Κ',
        'kappa' => 'κ',
        'kappav' => 'ϰ',
        'Kcedil' => 'Ķ',
        'kcedil' => 'ķ',
        'Kcy' => 'К',
        'kcy' => 'к',
        'Kfr' => '𝔎',
        'kfr' => '𝔨',
        'kgreen' => 'ĸ',
        'KHcy' => 'Х',
        'khcy' => 'х',
        'KJcy' => 'Ќ',
        'kjcy' => 'ќ',
        'Kopf' => '𝕂',
        'kopf' => '𝕜',
        'Kscr' => '𝒦',
        'kscr' => '𝓀',
        'lAarr' => '⇚',
        'Lacute' => 'Ĺ',
        'lacute' => 'ĺ',
        'laemptyv' => '⦴',
        'lagran' => 'ℒ',
        'Lambda' => 'Λ',
        'lambda' => 'λ',
        'Lang' => '⟪',
        'lang' => '⟨',
        'langd' => '⦑',
        'langle' => '⟨',
        'lap' => '⪅',
        'Laplacetrf' => 'ℒ',
        'laquo' => '«',
        'laqu' => '«',
        'Larr' => '↞',
        'lArr' => '⇐',
        'larr' => '←',
        'larrb' => '⇤',
        'larrbfs' => '⤟',
        'larrfs' => '⤝',
        'larrhk' => '↩',
        'larrlp' => '↫',
        'larrpl' => '⤹',
        'larrsim' => '⥳',
        'larrtl' => '↢',
        'lat' => '⪫',
        'lAtail' => '⤛',
        'latail' => '⤙',
        'late' => '⪭',
        'lates' => '⪭︀',
        'lBarr' => '⤎',
        'lbarr' => '⤌',
        'lbbrk' => '❲',
        'lbrace' => '{',
        'lbrack' => '[',
        'lbrke' => '⦋',
        'lbrksld' => '⦏',
        'lbrkslu' => '⦍',
        'Lcaron' => 'Ľ',
        'lcaron' => 'ľ',
        'Lcedil' => 'Ļ',
        'lcedil' => 'ļ',
        'lceil' => '⌈',
        'lcub' => '{',
        'Lcy' => 'Л',
        'lcy' => 'л',
        'ldca' => '⤶',
        'ldquo' => '“',
        'ldquor' => '„',
        'ldrdhar' => '⥧',
        'ldrushar' => '⥋',
        'ldsh' => '↲',
        'lE' => '≦',
        'le' => '≤',
        'LeftAngleBracket' => '⟨',
        'LeftArrow' => '←',
        'Leftarrow' => '⇐',
        'leftarrow' => '←',
        'LeftArrowBar' => '⇤',
        'LeftArrowRightArrow' => '⇆',
        'leftarrowtail' => '↢',
        'LeftCeiling' => '⌈',
        'LeftDoubleBracket' => '⟦',
        'LeftDownTeeVector' => '⥡',
        'LeftDownVector' => '⇃',
        'LeftDownVectorBar' => '⥙',
        'LeftFloor' => '⌊',
        'leftharpoondown' => '↽',
        'leftharpoonup' => '↼',
        'leftleftarrows' => '⇇',
        'LeftRightArrow' => '↔',
        'Leftrightarrow' => '⇔',
        'leftrightarrow' => '↔',
        'leftrightarrows' => '⇆',
        'leftrightharpoons' => '⇋',
        'leftrightsquigarrow' => '↭',
        'LeftRightVector' => '⥎',
        'LeftTee' => '⊣',
        'LeftTeeArrow' => '↤',
        'LeftTeeVector' => '⥚',
        'leftthreetimes' => '⋋',
        'LeftTriangle' => '⊲',
        'LeftTriangleBar' => '⧏',
        'LeftTriangleEqual' => '⊴',
        'LeftUpDownVector' => '⥑',
        'LeftUpTeeVector' => '⥠',
        'LeftUpVector' => '↿',
        'LeftUpVectorBar' => '⥘',
        'LeftVector' => '↼',
        'LeftVectorBar' => '⥒',
        'lEg' => '⪋',
        'leg' => '⋚',
        'leq' => '≤',
        'leqq' => '≦',
        'leqslant' => '⩽',
        'les' => '⩽',
        'lescc' => '⪨',
        'lesdot' => '⩿',
        'lesdoto' => '⪁',
        'lesdotor' => '⪃',
        'lesg' => '⋚︀',
        'lesges' => '⪓',
        'lessapprox' => '⪅',
        'lessdot' => '⋖',
        'lesseqgtr' => '⋚',
        'lesseqqgtr' => '⪋',
        'LessEqualGreater' => '⋚',
        'LessFullEqual' => '≦',
        'LessGreater' => '≶',
        'lessgtr' => '≶',
        'LessLess' => '⪡',
        'lesssim' => '≲',
        'LessSlantEqual' => '⩽',
        'LessTilde' => '≲',
        'lfisht' => '⥼',
        'lfloor' => '⌊',
        'Lfr' => '𝔏',
        'lfr' => '𝔩',
        'lg' => '≶',
        'lgE' => '⪑',
        'lHar' => '⥢',
        'lhard' => '↽',
        'lharu' => '↼',
        'lharul' => '⥪',
        'lhblk' => '▄',
        'LJcy' => 'Љ',
        'ljcy' => 'љ',
        'Ll' => '⋘',
        'll' => '≪',
        'llarr' => '⇇',
        'llcorner' => '⌞',
        'Lleftarrow' => '⇚',
        'llhard' => '⥫',
        'lltri' => '◺',
        'Lmidot' => 'Ŀ',
        'lmidot' => 'ŀ',
        'lmoust' => '⎰',
        'lmoustache' => '⎰',
        'lnap' => '⪉',
        'lnapprox' => '⪉',
        'lnE' => '≨',
        'lne' => '⪇',
        'lneq' => '⪇',
        'lneqq' => '≨',
        'lnsim' => '⋦',
        'loang' => '⟬',
        'loarr' => '⇽',
        'lobrk' => '⟦',
        'LongLeftArrow' => '⟵',
        'Longleftarrow' => '⟸',
        'longleftarrow' => '⟵',
        'LongLeftRightArrow' => '⟷',
        'Longleftrightarrow' => '⟺',
        'longleftrightarrow' => '⟷',
        'longmapsto' => '⟼',
        'LongRightArrow' => '⟶',
        'Longrightarrow' => '⟹',
        'longrightarrow' => '⟶',
        'looparrowleft' => '↫',
        'looparrowright' => '↬',
        'lopar' => '⦅',
        'Lopf' => '𝕃',
        'lopf' => '𝕝',
        'loplus' => '⨭',
        'lotimes' => '⨴',
        'lowast' => '∗',
        'lowbar' => '_',
        'LowerLeftArrow' => '↙',
        'LowerRightArrow' => '↘',
        'loz' => '◊',
        'lozenge' => '◊',
        'lozf' => '⧫',
        'lpar' => '(',
        'lparlt' => '⦓',
        'lrarr' => '⇆',
        'lrcorner' => '⌟',
        'lrhar' => '⇋',
        'lrhard' => '⥭',
        'lrm' => '‎',
        'lrtri' => '⊿',
        'lsaquo' => '‹',
        'Lscr' => 'ℒ',
        'lscr' => '𝓁',
        'Lsh' => '↰',
        'lsh' => '↰',
        'lsim' => '≲',
        'lsime' => '⪍',
        'lsimg' => '⪏',
        'lsqb' => '[',
        'lsquo' => '‘',
        'lsquor' => '‚',
        'Lstrok' => 'Ł',
        'lstrok' => 'ł',
        'LT' => '<',
        'L' => '<',
        'Lt' => '≪',
        'lt' => '<',
        'l' => '<',
        'ltcc' => '⪦',
        'ltcir' => '⩹',
        'ltdot' => '⋖',
        'lthree' => '⋋',
        'ltimes' => '⋉',
        'ltlarr' => '⥶',
        'ltquest' => '⩻',
        'ltri' => '◃',
        'ltrie' => '⊴',
        'ltrif' => '◂',
        'ltrPar' => '⦖',
        'lurdshar' => '⥊',
        'luruhar' => '⥦',
        'lvertneqq' => '≨︀',
        'lvnE' => '≨︀',
        'macr' => '¯',
        'mac' => '¯',
        'male' => '♂',
        'malt' => '✠',
        'maltese' => '✠',
        'Map' => '⤅',
        'map' => '↦',
        'mapsto' => '↦',
        'mapstodown' => '↧',
        'mapstoleft' => '↤',
        'mapstoup' => '↥',
        'marker' => '▮',
        'mcomma' => '⨩',
        'Mcy' => 'М',
        'mcy' => 'м',
        'mdash' => '—',
        'mDDot' => '∺',
        'measuredangle' => '∡',
        'MediumSpace' => ' ',
        'Mellintrf' => 'ℳ',
        'Mfr' => '𝔐',
        'mfr' => '𝔪',
        'mho' => '℧',
        'micro' => 'µ',
        'micr' => 'µ',
        'mid' => '∣',
        'midast' => '*',
        'midcir' => '⫰',
        'middot' => '·',
        'middo' => '·',
        'minus' => '−',
        'minusb' => '⊟',
        'minusd' => '∸',
        'minusdu' => '⨪',
        'MinusPlus' => '∓',
        'mlcp' => '⫛',
        'mldr' => '…',
        'mnplus' => '∓',
        'models' => '⊧',
        'Mopf' => '𝕄',
        'mopf' => '𝕞',
        'mp' => '∓',
        'Mscr' => 'ℳ',
        'mscr' => '𝓂',
        'mstpos' => '∾',
        'Mu' => 'Μ',
        'mu' => 'μ',
        'multimap' => '⊸',
        'mumap' => '⊸',
        'nabla' => '∇',
        'Nacute' => 'Ń',
        'nacute' => 'ń',
        'nang' => '∠⃒',
        'nap' => '≉',
        'napE' => '⩰̸',
        'napid' => '≋̸',
        'napos' => 'ŉ',
        'napprox' => '≉',
        'natur' => '♮',
        'natural' => '♮',
        'naturals' => 'ℕ',
        'nbsp' => ' ',
        'nbs' => ' ',
        'nbump' => '≎̸',
        'nbumpe' => '≏̸',
        'ncap' => '⩃',
        'Ncaron' => 'Ň',
        'ncaron' => 'ň',
        'Ncedil' => 'Ņ',
        'ncedil' => 'ņ',
        'ncong' => '≇',
        'ncongdot' => '⩭̸',
        'ncup' => '⩂',
        'Ncy' => 'Н',
        'ncy' => 'н',
        'ndash' => '–',
        'ne' => '≠',
        'nearhk' => '⤤',
        'neArr' => '⇗',
        'nearr' => '↗',
        'nearrow' => '↗',
        'nedot' => '≐̸',
        'NegativeMediumSpace' => '​',
        'NegativeThickSpace' => '​',
        'NegativeThinSpace' => '​',
        'NegativeVeryThinSpace' => '​',
        'nequiv' => '≢',
        'nesear' => '⤨',
        'nesim' => '≂̸',
        'NestedGreaterGreater' => '≫',
        'NestedLessLess' => '≪',
        'NewLine' => '
',
        'nexist' => '∄',
        'nexists' => '∄',
        'Nfr' => '𝔑',
        'nfr' => '𝔫',
        'ngE' => '≧̸',
        'nge' => '≱',
        'ngeq' => '≱',
        'ngeqq' => '≧̸',
        'ngeqslant' => '⩾̸',
        'nges' => '⩾̸',
        'nGg' => '⋙̸',
        'ngsim' => '≵',
        'nGt' => '≫⃒',
        'ngt' => '≯',
        'ngtr' => '≯',
        'nGtv' => '≫̸',
        'nhArr' => '⇎',
        'nharr' => '↮',
        'nhpar' => '⫲',
        'ni' => '∋',
        'nis' => '⋼',
        'nisd' => '⋺',
        'niv' => '∋',
        'NJcy' => 'Њ',
        'njcy' => 'њ',
        'nlArr' => '⇍',
        'nlarr' => '↚',
        'nldr' => '‥',
        'nlE' => '≦̸',
        'nle' => '≰',
        'nLeftarrow' => '⇍',
        'nleftarrow' => '↚',
        'nLeftrightarrow' => '⇎',
        'nleftrightarrow' => '↮',
        'nleq' => '≰',
        'nleqq' => '≦̸',
        'nleqslant' => '⩽̸',
        'nles' => '⩽̸',
        'nless' => '≮',
        'nLl' => '⋘̸',
        'nlsim' => '≴',
        'nLt' => '≪⃒',
        'nlt' => '≮',
        'nltri' => '⋪',
        'nltrie' => '⋬',
        'nLtv' => '≪̸',
        'nmid' => '∤',
        'NoBreak' => '⁠',
        'NonBreakingSpace' => ' ',
        'Nopf' => 'ℕ',
        'nopf' => '𝕟',
        'Not' => '⫬',
        'not' => '¬',
        'no' => '¬',
        'NotCongruent' => '≢',
        'NotCupCap' => '≭',
        'NotDoubleVerticalBar' => '∦',
        'NotElement' => '∉',
        'NotEqual' => '≠',
        'NotEqualTilde' => '≂̸',
        'NotExists' => '∄',
        'NotGreater' => '≯',
        'NotGreaterEqual' => '≱',
        'NotGreaterFullEqual' => '≧̸',
        'NotGreaterGreater' => '≫̸',
        'NotGreaterLess' => '≹',
        'NotGreaterSlantEqual' => '⩾̸',
        'NotGreaterTilde' => '≵',
        'NotHumpDownHump' => '≎̸',
        'NotHumpEqual' => '≏̸',
        'notin' => '∉',
        'notindot' => '⋵̸',
        'notinE' => '⋹̸',
        'notinva' => '∉',
        'notinvb' => '⋷',
        'notinvc' => '⋶',
        'NotLeftTriangle' => '⋪',
        'NotLeftTriangleBar' => '⧏̸',
        'NotLeftTriangleEqual' => '⋬',
        'NotLess' => '≮',
        'NotLessEqual' => '≰',
        'NotLessGreater' => '≸',
        'NotLessLess' => '≪̸',
        'NotLessSlantEqual' => '⩽̸',
        'NotLessTilde' => '≴',
        'NotNestedGreaterGreater' => '⪢̸',
        'NotNestedLessLess' => '⪡̸',
        'notni' => '∌',
        'notniva' => '∌',
        'notnivb' => '⋾',
        'notnivc' => '⋽',
        'NotPrecedes' => '⊀',
        'NotPrecedesEqual' => '⪯̸',
        'NotPrecedesSlantEqual' => '⋠',
        'NotReverseElement' => '∌',
        'NotRightTriangle' => '⋫',
        'NotRightTriangleBar' => '⧐̸',
        'NotRightTriangleEqual' => '⋭',
        'NotSquareSubset' => '⊏̸',
        'NotSquareSubsetEqual' => '⋢',
        'NotSquareSuperset' => '⊐̸',
        'NotSquareSupersetEqual' => '⋣',
        'NotSubset' => '⊂⃒',
        'NotSubsetEqual' => '⊈',
        'NotSucceeds' => '⊁',
        'NotSucceedsEqual' => '⪰̸',
        'NotSucceedsSlantEqual' => '⋡',
        'NotSucceedsTilde' => '≿̸',
        'NotSuperset' => '⊃⃒',
        'NotSupersetEqual' => '⊉',
        'NotTilde' => '≁',
        'NotTildeEqual' => '≄',
        'NotTildeFullEqual' => '≇',
        'NotTildeTilde' => '≉',
        'NotVerticalBar' => '∤',
        'npar' => '∦',
        'nparallel' => '∦',
        'nparsl' => '⫽⃥',
        'npart' => '∂̸',
        'npolint' => '⨔',
        'npr' => '⊀',
        'nprcue' => '⋠',
        'npre' => '⪯̸',
        'nprec' => '⊀',
        'npreceq' => '⪯̸',
        'nrArr' => '⇏',
        'nrarr' => '↛',
        'nrarrc' => '⤳̸',
        'nrarrw' => '↝̸',
        'nRightarrow' => '⇏',
        'nrightarrow' => '↛',
        'nrtri' => '⋫',
        'nrtrie' => '⋭',
        'nsc' => '⊁',
        'nsccue' => '⋡',
        'nsce' => '⪰̸',
        'Nscr' => '𝒩',
        'nscr' => '𝓃',
        'nshortmid' => '∤',
        'nshortparallel' => '∦',
        'nsim' => '≁',
        'nsime' => '≄',
        'nsimeq' => '≄',
        'nsmid' => '∤',
        'nspar' => '∦',
        'nsqsube' => '⋢',
        'nsqsupe' => '⋣',
        'nsub' => '⊄',
        'nsubE' => '⫅̸',
        'nsube' => '⊈',
        'nsubset' => '⊂⃒',
        'nsubseteq' => '⊈',
        'nsubseteqq' => '⫅̸',
        'nsucc' => '⊁',
        'nsucceq' => '⪰̸',
        'nsup' => '⊅',
        'nsupE' => '⫆̸',
        'nsupe' => '⊉',
        'nsupset' => '⊃⃒',
        'nsupseteq' => '⊉',
        'nsupseteqq' => '⫆̸',
        'ntgl' => '≹',
        'Ntilde' => 'Ñ',
        'Ntild' => 'Ñ',
        'ntilde' => 'ñ',
        'ntild' => 'ñ',
        'ntlg' => '≸',
        'ntriangleleft' => '⋪',
        'ntrianglelefteq' => '⋬',
        'ntriangleright' => '⋫',
        'ntrianglerighteq' => '⋭',
        'Nu' => 'Ν',
        'nu' => 'ν',
        'num' => '#',
        'numero' => '№',
        'numsp' => ' ',
        'nvap' => '≍⃒',
        'nVDash' => '⊯',
        'nVdash' => '⊮',
        'nvDash' => '⊭',
        'nvdash' => '⊬',
        'nvge' => '≥⃒',
        'nvgt' => '>⃒',
        'nvHarr' => '⤄',
        'nvinfin' => '⧞',
        'nvlArr' => '⤂',
        'nvle' => '≤⃒',
        'nvlt' => '<⃒',
        'nvltrie' => '⊴⃒',
        'nvrArr' => '⤃',
        'nvrtrie' => '⊵⃒',
        'nvsim' => '∼⃒',
        'nwarhk' => '⤣',
        'nwArr' => '⇖',
        'nwarr' => '↖',
        'nwarrow' => '↖',
        'nwnear' => '⤧',
        'Oacute' => 'Ó',
        'Oacut' => 'Ó',
        'oacute' => 'ó',
        'oacut' => 'ó',
        'oast' => '⊛',
        'ocir' => 'ô',
        'Ocirc' => 'Ô',
        'Ocir' => 'Ô',
        'ocirc' => 'ô',
        'Ocy' => 'О',
        'ocy' => 'о',
        'odash' => '⊝',
        'Odblac' => 'Ő',
        'odblac' => 'ő',
        'odiv' => '⨸',
        'odot' => '⊙',
        'odsold' => '⦼',
        'OElig' => 'Œ',
        'oelig' => 'œ',
        'ofcir' => '⦿',
        'Ofr' => '𝔒',
        'ofr' => '𝔬',
        'ogon' => '˛',
        'Ograve' => 'Ò',
        'Ograv' => 'Ò',
        'ograve' => 'ò',
        'ograv' => 'ò',
        'ogt' => '⧁',
        'ohbar' => '⦵',
        'ohm' => 'Ω',
        'oint' => '∮',
        'olarr' => '↺',
        'olcir' => '⦾',
        'olcross' => '⦻',
        'oline' => '‾',
        'olt' => '⧀',
        'Omacr' => 'Ō',
        'omacr' => 'ō',
        'Omega' => 'Ω',
        'omega' => 'ω',
        'Omicron' => 'Ο',
        'omicron' => 'ο',
        'omid' => '⦶',
        'ominus' => '⊖',
        'Oopf' => '𝕆',
        'oopf' => '𝕠',
        'opar' => '⦷',
        'OpenCurlyDoubleQuote' => '“',
        'OpenCurlyQuote' => '‘',
        'operp' => '⦹',
        'oplus' => '⊕',
        'Or' => '⩔',
        'or' => '∨',
        'orarr' => '↻',
        'ord' => 'º',
        'order' => 'ℴ',
        'orderof' => 'ℴ',
        'ordf' => 'ª',
        'ordm' => 'º',
        'origof' => '⊶',
        'oror' => '⩖',
        'orslope' => '⩗',
        'orv' => '⩛',
        'oS' => 'Ⓢ',
        'Oscr' => '𝒪',
        'oscr' => 'ℴ',
        'Oslash' => 'Ø',
        'Oslas' => 'Ø',
        'oslash' => 'ø',
        'oslas' => 'ø',
        'osol' => '⊘',
        'Otilde' => 'Õ',
        'Otild' => 'Õ',
        'otilde' => 'õ',
        'otild' => 'õ',
        'Otimes' => '⨷',
        'otimes' => '⊗',
        'otimesas' => '⨶',
        'Ouml' => 'Ö',
        'Oum' => 'Ö',
        'ouml' => 'ö',
        'oum' => 'ö',
        'ovbar' => '⌽',
        'OverBar' => '‾',
        'OverBrace' => '⏞',
        'OverBracket' => '⎴',
        'OverParenthesis' => '⏜',
        'par' => '¶',
        'para' => '¶',
        'parallel' => '∥',
        'parsim' => '⫳',
        'parsl' => '⫽',
        'part' => '∂',
        'PartialD' => '∂',
        'Pcy' => 'П',
        'pcy' => 'п',
        'percnt' => '%',
        'period' => '.',
        'permil' => '‰',
        'perp' => '⊥',
        'pertenk' => '‱',
        'Pfr' => '𝔓',
        'pfr' => '𝔭',
        'Phi' => 'Φ',
        'phi' => 'φ',
        'phiv' => 'ϕ',
        'phmmat' => 'ℳ',
        'phone' => '☎',
        'Pi' => 'Π',
        'pi' => 'π',
        'pitchfork' => '⋔',
        'piv' => 'ϖ',
        'planck' => 'ℏ',
        'planckh' => 'ℎ',
        'plankv' => 'ℏ',
        'plus' => '+',
        'plusacir' => '⨣',
        'plusb' => '⊞',
        'pluscir' => '⨢',
        'plusdo' => '∔',
        'plusdu' => '⨥',
        'pluse' => '⩲',
        'PlusMinus' => '±',
        'plusmn' => '±',
        'plusm' => '±',
        'plussim' => '⨦',
        'plustwo' => '⨧',
        'pm' => '±',
        'Poincareplane' => 'ℌ',
        'pointint' => '⨕',
        'Popf' => 'ℙ',
        'popf' => '𝕡',
        'pound' => '£',
        'poun' => '£',
        'Pr' => '⪻',
        'pr' => '≺',
        'prap' => '⪷',
        'prcue' => '≼',
        'prE' => '⪳',
        'pre' => '⪯',
        'prec' => '≺',
        'precapprox' => '⪷',
        'preccurlyeq' => '≼',
        'Precedes' => '≺',
        'PrecedesEqual' => '⪯',
        'PrecedesSlantEqual' => '≼',
        'PrecedesTilde' => '≾',
        'preceq' => '⪯',
        'precnapprox' => '⪹',
        'precneqq' => '⪵',
        'precnsim' => '⋨',
        'precsim' => '≾',
        'Prime' => '″',
        'prime' => '′',
        'primes' => 'ℙ',
        'prnap' => '⪹',
        'prnE' => '⪵',
        'prnsim' => '⋨',
        'prod' => '∏',
        'Product' => '∏',
        'profalar' => '⌮',
        'profline' => '⌒',
        'profsurf' => '⌓',
        'prop' => '∝',
        'Proportion' => '∷',
        'Proportional' => '∝',
        'propto' => '∝',
        'prsim' => '≾',
        'prurel' => '⊰',
        'Pscr' => '𝒫',
        'pscr' => '𝓅',
        'Psi' => 'Ψ',
        'psi' => 'ψ',
        'puncsp' => ' ',
        'Qfr' => '𝔔',
        'qfr' => '𝔮',
        'qint' => '⨌',
        'Qopf' => 'ℚ',
        'qopf' => '𝕢',
        'qprime' => '⁗',
        'Qscr' => '𝒬',
        'qscr' => '𝓆',
        'quaternions' => 'ℍ',
        'quatint' => '⨖',
        'quest' => '?',
        'questeq' => '≟',
        'QUOT' => '"',
        'QUO' => '"',
        'quot' => '"',
        'quo' => '"',
        'rAarr' => '⇛',
        'race' => '∽̱',
        'Racute' => 'Ŕ',
        'racute' => 'ŕ',
        'radic' => '√',
        'raemptyv' => '⦳',
        'Rang' => '⟫',
        'rang' => '⟩',
        'rangd' => '⦒',
        'range' => '⦥',
        'rangle' => '⟩',
        'raquo' => '»',
        'raqu' => '»',
        'Rarr' => '↠',
        'rArr' => '⇒',
        'rarr' => '→',
        'rarrap' => '⥵',
        'rarrb' => '⇥',
        'rarrbfs' => '⤠',
        'rarrc' => '⤳',
        'rarrfs' => '⤞',
        'rarrhk' => '↪',
        'rarrlp' => '↬',
        'rarrpl' => '⥅',
        'rarrsim' => '⥴',
        'Rarrtl' => '⤖',
        'rarrtl' => '↣',
        'rarrw' => '↝',
        'rAtail' => '⤜',
        'ratail' => '⤚',
        'ratio' => '∶',
        'rationals' => 'ℚ',
        'RBarr' => '⤐',
        'rBarr' => '⤏',
        'rbarr' => '⤍',
        'rbbrk' => '❳',
        'rbrace' => '}',
        'rbrack' => ']',
        'rbrke' => '⦌',
        'rbrksld' => '⦎',
        'rbrkslu' => '⦐',
        'Rcaron' => 'Ř',
        'rcaron' => 'ř',
        'Rcedil' => 'Ŗ',
        'rcedil' => 'ŗ',
        'rceil' => '⌉',
        'rcub' => '}',
        'Rcy' => 'Р',
        'rcy' => 'р',
        'rdca' => '⤷',
        'rdldhar' => '⥩',
        'rdquo' => '”',
        'rdquor' => '”',
        'rdsh' => '↳',
        'Re' => 'ℜ',
        'real' => 'ℜ',
        'realine' => 'ℛ',
        'realpart' => 'ℜ',
        'reals' => 'ℝ',
        'rect' => '▭',
        'REG' => '®',
        'RE' => '®',
        'reg' => '®',
        're' => '®',
        'ReverseElement' => '∋',
        'ReverseEquilibrium' => '⇋',
        'ReverseUpEquilibrium' => '⥯',
        'rfisht' => '⥽',
        'rfloor' => '⌋',
        'Rfr' => 'ℜ',
        'rfr' => '𝔯',
        'rHar' => '⥤',
        'rhard' => '⇁',
        'rharu' => '⇀',
        'rharul' => '⥬',
        'Rho' => 'Ρ',
        'rho' => 'ρ',
        'rhov' => 'ϱ',
        'RightAngleBracket' => '⟩',
        'RightArrow' => '→',
        'Rightarrow' => '⇒',
        'rightarrow' => '→',
        'RightArrowBar' => '⇥',
        'RightArrowLeftArrow' => '⇄',
        'rightarrowtail' => '↣',
        'RightCeiling' => '⌉',
        'RightDoubleBracket' => '⟧',
        'RightDownTeeVector' => '⥝',
        'RightDownVector' => '⇂',
        'RightDownVectorBar' => '⥕',
        'RightFloor' => '⌋',
        'rightharpoondown' => '⇁',
        'rightharpoonup' => '⇀',
        'rightleftarrows' => '⇄',
        'rightleftharpoons' => '⇌',
        'rightrightarrows' => '⇉',
        'rightsquigarrow' => '↝',
        'RightTee' => '⊢',
        'RightTeeArrow' => '↦',
        'RightTeeVector' => '⥛',
        'rightthreetimes' => '⋌',
        'RightTriangle' => '⊳',
        'RightTriangleBar' => '⧐',
        'RightTriangleEqual' => '⊵',
        'RightUpDownVector' => '⥏',
        'RightUpTeeVector' => '⥜',
        'RightUpVector' => '↾',
        'RightUpVectorBar' => '⥔',
        'RightVector' => '⇀',
        'RightVectorBar' => '⥓',
        'ring' => '˚',
        'risingdotseq' => '≓',
        'rlarr' => '⇄',
        'rlhar' => '⇌',
        'rlm' => '‏',
        'rmoust' => '⎱',
        'rmoustache' => '⎱',
        'rnmid' => '⫮',
        'roang' => '⟭',
        'roarr' => '⇾',
        'robrk' => '⟧',
        'ropar' => '⦆',
        'Ropf' => 'ℝ',
        'ropf' => '𝕣',
        'roplus' => '⨮',
        'rotimes' => '⨵',
        'RoundImplies' => '⥰',
        'rpar' => ')',
        'rpargt' => '⦔',
        'rppolint' => '⨒',
        'rrarr' => '⇉',
        'Rrightarrow' => '⇛',
        'rsaquo' => '›',
        'Rscr' => 'ℛ',
        'rscr' => '𝓇',
        'Rsh' => '↱',
        'rsh' => '↱',
        'rsqb' => ']',
        'rsquo' => '’',
        'rsquor' => '’',
        'rthree' => '⋌',
        'rtimes' => '⋊',
        'rtri' => '▹',
        'rtrie' => '⊵',
        'rtrif' => '▸',
        'rtriltri' => '⧎',
        'RuleDelayed' => '⧴',
        'ruluhar' => '⥨',
        'rx' => '℞',
        'Sacute' => 'Ś',
        'sacute' => 'ś',
        'sbquo' => '‚',
        'Sc' => '⪼',
        'sc' => '≻',
        'scap' => '⪸',
        'Scaron' => 'Š',
        'scaron' => 'š',
        'sccue' => '≽',
        'scE' => '⪴',
        'sce' => '⪰',
        'Scedil' => 'Ş',
        'scedil' => 'ş',
        'Scirc' => 'Ŝ',
        'scirc' => 'ŝ',
        'scnap' => '⪺',
        'scnE' => '⪶',
        'scnsim' => '⋩',
        'scpolint' => '⨓',
        'scsim' => '≿',
        'Scy' => 'С',
        'scy' => 'с',
        'sdot' => '⋅',
        'sdotb' => '⊡',
        'sdote' => '⩦',
        'searhk' => '⤥',
        'seArr' => '⇘',
        'searr' => '↘',
        'searrow' => '↘',
        'sect' => '§',
        'sec' => '§',
        'semi' => ';',
        'seswar' => '⤩',
        'setminus' => '∖',
        'setmn' => '∖',
        'sext' => '✶',
        'Sfr' => '𝔖',
        'sfr' => '𝔰',
        'sfrown' => '⌢',
        'sharp' => '♯',
        'SHCHcy' => 'Щ',
        'shchcy' => 'щ',
        'SHcy' => 'Ш',
        'shcy' => 'ш',
        'ShortDownArrow' => '↓',
        'ShortLeftArrow' => '←',
        'shortmid' => '∣',
        'shortparallel' => '∥',
        'ShortRightArrow' => '→',
        'ShortUpArrow' => '↑',
        'shy' => '­',
        'sh' => '­',
        'Sigma' => 'Σ',
        'sigma' => 'σ',
        'sigmaf' => 'ς',
        'sigmav' => 'ς',
        'sim' => '∼',
        'simdot' => '⩪',
        'sime' => '≃',
        'simeq' => '≃',
        'simg' => '⪞',
        'simgE' => '⪠',
        'siml' => '⪝',
        'simlE' => '⪟',
        'simne' => '≆',
        'simplus' => '⨤',
        'simrarr' => '⥲',
        'slarr' => '←',
        'SmallCircle' => '∘',
        'smallsetminus' => '∖',
        'smashp' => '⨳',
        'smeparsl' => '⧤',
        'smid' => '∣',
        'smile' => '⌣',
        'smt' => '⪪',
        'smte' => '⪬',
        'smtes' => '⪬︀',
        'SOFTcy' => 'Ь',
        'softcy' => 'ь',
        'sol' => '/',
        'solb' => '⧄',
        'solbar' => '⌿',
        'Sopf' => '𝕊',
        'sopf' => '𝕤',
        'spades' => '♠',
        'spadesuit' => '♠',
        'spar' => '∥',
        'sqcap' => '⊓',
        'sqcaps' => '⊓︀',
        'sqcup' => '⊔',
        'sqcups' => '⊔︀',
        'Sqrt' => '√',
        'sqsub' => '⊏',
        'sqsube' => '⊑',
        'sqsubset' => '⊏',
        'sqsubseteq' => '⊑',
        'sqsup' => '⊐',
        'sqsupe' => '⊒',
        'sqsupset' => '⊐',
        'sqsupseteq' => '⊒',
        'squ' => '□',
        'Square' => '□',
        'square' => '□',
        'SquareIntersection' => '⊓',
        'SquareSubset' => '⊏',
        'SquareSubsetEqual' => '⊑',
        'SquareSuperset' => '⊐',
        'SquareSupersetEqual' => '⊒',
        'SquareUnion' => '⊔',
        'squarf' => '▪',
        'squf' => '▪',
        'srarr' => '→',
        'Sscr' => '𝒮',
        'sscr' => '𝓈',
        'ssetmn' => '∖',
        'ssmile' => '⌣',
        'sstarf' => '⋆',
        'Star' => '⋆',
        'star' => '☆',
        'starf' => '★',
        'straightepsilon' => 'ϵ',
        'straightphi' => 'ϕ',
        'strns' => '¯',
        'Sub' => '⋐',
        'sub' => '⊂',
        'subdot' => '⪽',
        'subE' => '⫅',
        'sube' => '⊆',
        'subedot' => '⫃',
        'submult' => '⫁',
        'subnE' => '⫋',
        'subne' => '⊊',
        'subplus' => '⪿',
        'subrarr' => '⥹',
        'Subset' => '⋐',
        'subset' => '⊂',
        'subseteq' => '⊆',
        'subseteqq' => '⫅',
        'SubsetEqual' => '⊆',
        'subsetneq' => '⊊',
        'subsetneqq' => '⫋',
        'subsim' => '⫇',
        'subsub' => '⫕',
        'subsup' => '⫓',
        'succ' => '≻',
        'succapprox' => '⪸',
        'succcurlyeq' => '≽',
        'Succeeds' => '≻',
        'SucceedsEqual' => '⪰',
        'SucceedsSlantEqual' => '≽',
        'SucceedsTilde' => '≿',
        'succeq' => '⪰',
        'succnapprox' => '⪺',
        'succneqq' => '⪶',
        'succnsim' => '⋩',
        'succsim' => '≿',
        'SuchThat' => '∋',
        'Sum' => '∑',
        'sum' => '∑',
        'sung' => '♪',
        'Sup' => '⋑',
        'sup' => '³',
        'sup1' => '¹',
        'sup2' => '²',
        'sup3' => '³',
        'supdot' => '⪾',
        'supdsub' => '⫘',
        'supE' => '⫆',
        'supe' => '⊇',
        'supedot' => '⫄',
        'Superset' => '⊃',
        'SupersetEqual' => '⊇',
        'suphsol' => '⟉',
        'suphsub' => '⫗',
        'suplarr' => '⥻',
        'supmult' => '⫂',
        'supnE' => '⫌',
        'supne' => '⊋',
        'supplus' => '⫀',
        'Supset' => '⋑',
        'supset' => '⊃',
        'supseteq' => '⊇',
        'supseteqq' => '⫆',
        'supsetneq' => '⊋',
        'supsetneqq' => '⫌',
        'supsim' => '⫈',
        'supsub' => '⫔',
        'supsup' => '⫖',
        'swarhk' => '⤦',
        'swArr' => '⇙',
        'swarr' => '↙',
        'swarrow' => '↙',
        'swnwar' => '⤪',
        'szlig' => 'ß',
        'szli' => 'ß',
        'Tab' => '	',
        'target' => '⌖',
        'Tau' => 'Τ',
        'tau' => 'τ',
        'tbrk' => '⎴',
        'Tcaron' => 'Ť',
        'tcaron' => 'ť',
        'Tcedil' => 'Ţ',
        'tcedil' => 'ţ',
        'Tcy' => 'Т',
        'tcy' => 'т',
        'tdot' => '⃛',
        'telrec' => '⌕',
        'Tfr' => '𝔗',
        'tfr' => '𝔱',
        'there4' => '∴',
        'Therefore' => '∴',
        'therefore' => '∴',
        'Theta' => 'Θ',
        'theta' => 'θ',
        'thetasym' => 'ϑ',
        'thetav' => 'ϑ',
        'thickapprox' => '≈',
        'thicksim' => '∼',
        'ThickSpace' => '  ',
        'thinsp' => ' ',
        'ThinSpace' => ' ',
        'thkap' => '≈',
        'thksim' => '∼',
        'THORN' => 'Þ',
        'THOR' => 'Þ',
        'thorn' => 'þ',
        'thor' => 'þ',
        'Tilde' => '∼',
        'tilde' => '˜',
        'TildeEqual' => '≃',
        'TildeFullEqual' => '≅',
        'TildeTilde' => '≈',
        'times' => '×',
        'time' => '×',
        'timesb' => '⊠',
        'timesbar' => '⨱',
        'timesd' => '⨰',
        'tint' => '∭',
        'toea' => '⤨',
        'top' => '⊤',
        'topbot' => '⌶',
        'topcir' => '⫱',
        'Topf' => '𝕋',
        'topf' => '𝕥',
        'topfork' => '⫚',
        'tosa' => '⤩',
        'tprime' => '‴',
        'TRADE' => '™',
        'trade' => '™',
        'triangle' => '▵',
        'triangledown' => '▿',
        'triangleleft' => '◃',
        'trianglelefteq' => '⊴',
        'triangleq' => '≜',
        'triangleright' => '▹',
        'trianglerighteq' => '⊵',
        'tridot' => '◬',
        'trie' => '≜',
        'triminus' => '⨺',
        'TripleDot' => '⃛',
        'triplus' => '⨹',
        'trisb' => '⧍',
        'tritime' => '⨻',
        'trpezium' => '⏢',
        'Tscr' => '𝒯',
        'tscr' => '𝓉',
        'TScy' => 'Ц',
        'tscy' => 'ц',
        'TSHcy' => 'Ћ',
        'tshcy' => 'ћ',
        'Tstrok' => 'Ŧ',
        'tstrok' => 'ŧ',
        'twixt' => '≬',
        'twoheadleftarrow' => '↞',
        'twoheadrightarrow' => '↠',
        'Uacute' => 'Ú',
        'Uacut' => 'Ú',
        'uacute' => 'ú',
        'uacut' => 'ú',
        'Uarr' => '↟',
        'uArr' => '⇑',
        'uarr' => '↑',
        'Uarrocir' => '⥉',
        'Ubrcy' => 'Ў',
        'ubrcy' => 'ў',
        'Ubreve' => 'Ŭ',
        'ubreve' => 'ŭ',
        'Ucirc' => 'Û',
        'Ucir' => 'Û',
        'ucirc' => 'û',
        'ucir' => 'û',
        'Ucy' => 'У',
        'ucy' => 'у',
        'udarr' => '⇅',
        'Udblac' => 'Ű',
        'udblac' => 'ű',
        'udhar' => '⥮',
        'ufisht' => '⥾',
        'Ufr' => '𝔘',
        'ufr' => '𝔲',
        'Ugrave' => 'Ù',
        'Ugrav' => 'Ù',
        'ugrave' => 'ù',
        'ugrav' => 'ù',
        'uHar' => '⥣',
        'uharl' => '↿',
        'uharr' => '↾',
        'uhblk' => '▀',
        'ulcorn' => '⌜',
        'ulcorner' => '⌜',
        'ulcrop' => '⌏',
        'ultri' => '◸',
        'Umacr' => 'Ū',
        'umacr' => 'ū',
        'uml' => '¨',
        'um' => '¨',
        'UnderBar' => '_',
        'UnderBrace' => '⏟',
        'UnderBracket' => '⎵',
        'UnderParenthesis' => '⏝',
        'Union' => '⋃',
        'UnionPlus' => '⊎',
        'Uogon' => 'Ų',
        'uogon' => 'ų',
        'Uopf' => '𝕌',
        'uopf' => '𝕦',
        'UpArrow' => '↑',
        'Uparrow' => '⇑',
        'uparrow' => '↑',
        'UpArrowBar' => '⤒',
        'UpArrowDownArrow' => '⇅',
        'UpDownArrow' => '↕',
        'Updownarrow' => '⇕',
        'updownarrow' => '↕',
        'UpEquilibrium' => '⥮',
        'upharpoonleft' => '↿',
        'upharpoonright' => '↾',
        'uplus' => '⊎',
        'UpperLeftArrow' => '↖',
        'UpperRightArrow' => '↗',
        'Upsi' => 'ϒ',
        'upsi' => 'υ',
        'upsih' => 'ϒ',
        'Upsilon' => 'Υ',
        'upsilon' => 'υ',
        'UpTee' => '⊥',
        'UpTeeArrow' => '↥',
        'upuparrows' => '⇈',
        'urcorn' => '⌝',
        'urcorner' => '⌝',
        'urcrop' => '⌎',
        'Uring' => 'Ů',
        'uring' => 'ů',
        'urtri' => '◹',
        'Uscr' => '𝒰',
        'uscr' => '𝓊',
        'utdot' => '⋰',
        'Utilde' => 'Ũ',
        'utilde' => 'ũ',
        'utri' => '▵',
        'utrif' => '▴',
        'uuarr' => '⇈',
        'Uuml' => 'Ü',
        'Uum' => 'Ü',
        'uuml' => 'ü',
        'uum' => 'ü',
        'uwangle' => '⦧',
        'vangrt' => '⦜',
        'varepsilon' => 'ϵ',
        'varkappa' => 'ϰ',
        'varnothing' => '∅',
        'varphi' => 'ϕ',
        'varpi' => 'ϖ',
        'varpropto' => '∝',
        'vArr' => '⇕',
        'varr' => '↕',
        'varrho' => 'ϱ',
        'varsigma' => 'ς',
        'varsubsetneq' => '⊊︀',
        'varsubsetneqq' => '⫋︀',
        'varsupsetneq' => '⊋︀',
        'varsupsetneqq' => '⫌︀',
        'vartheta' => 'ϑ',
        'vartriangleleft' => '⊲',
        'vartriangleright' => '⊳',
        'Vbar' => '⫫',
        'vBar' => '⫨',
        'vBarv' => '⫩',
        'Vcy' => 'В',
        'vcy' => 'в',
        'VDash' => '⊫',
        'Vdash' => '⊩',
        'vDash' => '⊨',
        'vdash' => '⊢',
        'Vdashl' => '⫦',
        'Vee' => '⋁',
        'vee' => '∨',
        'veebar' => '⊻',
        'veeeq' => '≚',
        'vellip' => '⋮',
        'Verbar' => '‖',
        'verbar' => '|',
        'Vert' => '‖',
        'vert' => '|',
        'VerticalBar' => '∣',
        'VerticalLine' => '|',
        'VerticalSeparator' => '❘',
        'VerticalTilde' => '≀',
        'VeryThinSpace' => ' ',
        'Vfr' => '𝔙',
        'vfr' => '𝔳',
        'vltri' => '⊲',
        'vnsub' => '⊂⃒',
        'vnsup' => '⊃⃒',
        'Vopf' => '𝕍',
        'vopf' => '𝕧',
        'vprop' => '∝',
        'vrtri' => '⊳',
        'Vscr' => '𝒱',
        'vscr' => '𝓋',
        'vsubnE' => '⫋︀',
        'vsubne' => '⊊︀',
        'vsupnE' => '⫌︀',
        'vsupne' => '⊋︀',
        'Vvdash' => '⊪',
        'vzigzag' => '⦚',
        'Wcirc' => 'Ŵ',
        'wcirc' => 'ŵ',
        'wedbar' => '⩟',
        'Wedge' => '⋀',
        'wedge' => '∧',
        'wedgeq' => '≙',
        'weierp' => '℘',
        'Wfr' => '𝔚',
        'wfr' => '𝔴',
        'Wopf' => '𝕎',
        'wopf' => '𝕨',
        'wp' => '℘',
        'wr' => '≀',
        'wreath' => '≀',
        'Wscr' => '𝒲',
        'wscr' => '𝓌',
        'xcap' => '⋂',
        'xcirc' => '◯',
        'xcup' => '⋃',
        'xdtri' => '▽',
        'Xfr' => '𝔛',
        'xfr' => '𝔵',
        'xhArr' => '⟺',
        'xharr' => '⟷',
        'Xi' => 'Ξ',
        'xi' => 'ξ',
        'xlArr' => '⟸',
        'xlarr' => '⟵',
        'xmap' => '⟼',
        'xnis' => '⋻',
        'xodot' => '⨀',
        'Xopf' => '𝕏',
        'xopf' => '𝕩',
        'xoplus' => '⨁',
        'xotime' => '⨂',
        'xrArr' => '⟹',
        'xrarr' => '⟶',
        'Xscr' => '𝒳',
        'xscr' => '𝓍',
        'xsqcup' => '⨆',
        'xuplus' => '⨄',
        'xutri' => '△',
        'xvee' => '⋁',
        'xwedge' => '⋀',
        'Yacute' => 'Ý',
        'Yacut' => 'Ý',
        'yacute' => 'ý',
        'yacut' => 'ý',
        'YAcy' => 'Я',
        'yacy' => 'я',
        'Ycirc' => 'Ŷ',
        'ycirc' => 'ŷ',
        'Ycy' => 'Ы',
        'ycy' => 'ы',
        'yen' => '¥',
        'ye' => '¥',
        'Yfr' => '𝔜',
        'yfr' => '𝔶',
        'YIcy' => 'Ї',
        'yicy' => 'ї',
        'Yopf' => '𝕐',
        'yopf' => '𝕪',
        'Yscr' => '𝒴',
        'yscr' => '𝓎',
        'YUcy' => 'Ю',
        'yucy' => 'ю',
        'Yuml' => 'Ÿ',
        'yuml' => 'ÿ',
        'yum' => 'ÿ',
        'Zacute' => 'Ź',
        'zacute' => 'ź',
        'Zcaron' => 'Ž',
        'zcaron' => 'ž',
        'Zcy' => 'З',
        'zcy' => 'з',
        'Zdot' => 'Ż',
        'zdot' => 'ż',
        'zeetrf' => 'ℨ',
        'ZeroWidthSpace' => '​',
        'Zeta' => 'Ζ',
        'zeta' => 'ζ',
        'Zfr' => 'ℨ',
        'zfr' => '𝔷',
        'ZHcy' => 'Ж',
        'zhcy' => 'ж',
        'zigrarr' => '⇝',
        'Zopf' => 'ℤ',
        'zopf' => '𝕫',
        'Zscr' => '𝒵',
        'zscr' => '𝓏',
        'zwj' => '‍',
        'zwnj' => '‌',
    );
}
PK      ��ZP�k��  �    InstructionProcessor.phpnu W+A��        <?php
/**
 * A handler for processor instructions.
 */

namespace Masterminds\HTML5;

/**
 * Provide an processor to handle embedded instructions.
 *
 * XML defines a mechanism for inserting instructions (like PHP) into a
 * document. These are called "Processor Instructions." The HTML5 parser
 * provides an opportunity to handle these processor instructions during
 * the tree-building phase (before the DOM is constructed), which makes
 * it possible to alter the document as it is being created.
 *
 * One could, for example, use this mechanism to execute well-formed PHP
 * code embedded inside of an HTML5 document.
 */
interface InstructionProcessor
{
    /**
     * Process an individual processing instruction.
     *
     * The process() function is responsible for doing the following:
     * - Determining whether $name is an instruction type it can handle.
     * - Determining what to do with the data passed in.
     * - Making any subsequent modifications to the DOM by modifying the
     * DOMElement or its attached DOM tree.
     *
     * @param \DOMElement $element The parent element for the current processing instruction.
     * @param string      $name    The instruction's name. E.g. `&lt;?php` has the name `php`.
     * @param string      $data    All of the data between the opening and closing PI marks.
     *
     * @return \DOMElement The element that should be considered "Current". This may just be
     *                     the element passed in, but if the processor added more elements,
     *                     it may choose to reset the current element to one of the elements
     *                     it created. (When in doubt, return the element passed in.)
     */
    public function process(\DOMElement $element, $name, $data);
}
PK      ��Z::)�EM  EM    Elements.phpnu W+A��        <?php
/**
 * Provide general element functions.
 */

namespace Masterminds\HTML5;

/**
 * This class provides general information about HTML5 elements,
 * including syntactic and semantic issues.
 * Parsers and serializers can
 * use this class as a reference point for information about the rules
 * of various HTML5 elements.
 *
 * @todo consider using a bitmask table lookup. There is enough overlap in
 *       naming that this could significantly shrink the size and maybe make it
 *       faster. See the Go teams implementation at https://code.google.com/p/go/source/browse/html/atom.
 */
class Elements
{
    /**
     * Indicates an element is described in the specification.
     */
    const KNOWN_ELEMENT = 1;

    // From section 8.1.2: "script", "style"
    // From 8.2.5.4.7 ("in body" insertion mode): "noembed"
    // From 8.4 "style", "xmp", "iframe", "noembed", "noframes"
    /**
     * Indicates the contained text should be processed as raw text.
     */
    const TEXT_RAW = 2;

    // From section 8.1.2: "textarea", "title"
    /**
     * Indicates the contained text should be processed as RCDATA.
     */
    const TEXT_RCDATA = 4;

    /**
     * Indicates the tag cannot have content.
     */
    const VOID_TAG = 8;

    // "address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl",
    // "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu",
    // "nav", "ol", "p", "section", "summary", "ul"
    // "h1", "h2", "h3", "h4", "h5", "h6"
    // "pre", "listing"
    // "form"
    // "plaintext"
    /**
     * Indicates that if a previous event is for a P tag, that element
     * should be considered closed.
     */
    const AUTOCLOSE_P = 16;

    /**
     * Indicates that the text inside is plaintext (pre).
     */
    const TEXT_PLAINTEXT = 32;

    // See https://developer.mozilla.org/en-US/docs/HTML/Block-level_elements
    /**
     * Indicates that the tag is a block.
     */
    const BLOCK_TAG = 64;

    /**
     * Indicates that the tag allows only inline elements as child nodes.
     */
    const BLOCK_ONLY_INLINE = 128;

    /**
     * The HTML5 elements as defined in http://dev.w3.org/html5/markup/elements.html.
     *
     * @var array
     */
    public static $html5 = array(
        'a' => 1,
        'abbr' => 1,
        'address' => 65, // NORMAL | BLOCK_TAG
        'area' => 9, // NORMAL | VOID_TAG
        'article' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'aside' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'audio' => 1, // NORMAL
        'b' => 1,
        'base' => 9, // NORMAL | VOID_TAG
        'bdi' => 1,
        'bdo' => 1,
        'blockquote' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'body' => 1,
        'br' => 9, // NORMAL | VOID_TAG
        'button' => 1,
        'canvas' => 65, // NORMAL | BLOCK_TAG
        'caption' => 1,
        'cite' => 1,
        'code' => 1,
        'col' => 9, // NORMAL | VOID_TAG
        'colgroup' => 1,
        'command' => 9, // NORMAL | VOID_TAG
                        // "data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
        'datalist' => 1,
        'dd' => 65, // NORMAL | BLOCK_TAG
        'del' => 1,
        'details' => 17, // NORMAL | AUTOCLOSE_P,
        'dfn' => 1,
        'dialog' => 17, // NORMAL | AUTOCLOSE_P,
        'div' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'dl' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'dt' => 1,
        'em' => 1,
        'embed' => 9, // NORMAL | VOID_TAG
        'fieldset' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'figcaption' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'figure' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'footer' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'form' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h1' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h2' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h3' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h4' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h5' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'h6' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'head' => 1,
        'header' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'hgroup' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'hr' => 73, // NORMAL | VOID_TAG
        'html' => 1,
        'i' => 1,
        'iframe' => 3, // NORMAL | TEXT_RAW
        'img' => 9, // NORMAL | VOID_TAG
        'input' => 9, // NORMAL | VOID_TAG
        'kbd' => 1,
        'ins' => 1,
        'keygen' => 9, // NORMAL | VOID_TAG
        'label' => 1,
        'legend' => 1,
        'li' => 1,
        'link' => 9, // NORMAL | VOID_TAG
        'map' => 1,
        'mark' => 1,
        'menu' => 17, // NORMAL | AUTOCLOSE_P,
        'meta' => 9, // NORMAL | VOID_TAG
        'meter' => 1,
        'nav' => 17, // NORMAL | AUTOCLOSE_P,
        'noscript' => 65, // NORMAL | BLOCK_TAG
        'object' => 1,
        'ol' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'optgroup' => 1,
        'option' => 1,
        'output' => 65, // NORMAL | BLOCK_TAG
        'p' => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE
        'param' => 9, // NORMAL | VOID_TAG
        'pre' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'progress' => 1,
        'q' => 1,
        'rp' => 1,
        'rt' => 1,
        'ruby' => 1,
        's' => 1,
        'samp' => 1,
        'script' => 3, // NORMAL | TEXT_RAW
        'section' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'select' => 1,
        'small' => 1,
        'source' => 9, // NORMAL | VOID_TAG
        'span' => 1,
        'strong' => 1,
        'style' => 3, // NORMAL | TEXT_RAW
        'sub' => 1,
        'summary' => 17, // NORMAL | AUTOCLOSE_P,
        'sup' => 1,
        'table' => 65, // NORMAL | BLOCK_TAG
        'tbody' => 1,
        'td' => 1,
        'textarea' => 5, // NORMAL | TEXT_RCDATA
        'tfoot' => 65, // NORMAL | BLOCK_TAG
        'th' => 1,
        'thead' => 1,
        'time' => 1,
        'title' => 5, // NORMAL | TEXT_RCDATA
        'tr' => 1,
        'track' => 9, // NORMAL | VOID_TAG
        'u' => 1,
        'ul' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
        'var' => 1,
        'video' => 1,
        'wbr' => 9, // NORMAL | VOID_TAG

        // Legacy?
        'basefont' => 8, // VOID_TAG
        'bgsound' => 8, // VOID_TAG
        'noframes' => 2, // RAW_TEXT
        'frame' => 9, // NORMAL | VOID_TAG
        'frameset' => 1,
        'center' => 16,
        'dir' => 16,
        'listing' => 16, // AUTOCLOSE_P
        'plaintext' => 48, // AUTOCLOSE_P | TEXT_PLAINTEXT
        'applet' => 0,
        'marquee' => 0,
        'isindex' => 8, // VOID_TAG
        'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
        'noembed' => 2, // RAW_TEXT
        );

    /**
     * The MathML elements.
     * See http://www.w3.org/wiki/MathML/Elements.
     *
     * In our case we are only concerned with presentation MathML and not content
     * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element.
     *
     * @var array
     */
    public static $mathml = array(
        'maction' => 1,
        'maligngroup' => 1,
        'malignmark' => 1,
        'math' => 1,
        'menclose' => 1,
        'merror' => 1,
        'mfenced' => 1,
        'mfrac' => 1,
        'mglyph' => 1,
        'mi' => 1,
        'mlabeledtr' => 1,
        'mlongdiv' => 1,
        'mmultiscripts' => 1,
        'mn' => 1,
        'mo' => 1,
        'mover' => 1,
        'mpadded' => 1,
        'mphantom' => 1,
        'mroot' => 1,
        'mrow' => 1,
        'ms' => 1,
        'mscarries' => 1,
        'mscarry' => 1,
        'msgroup' => 1,
        'msline' => 1,
        'mspace' => 1,
        'msqrt' => 1,
        'msrow' => 1,
        'mstack' => 1,
        'mstyle' => 1,
        'msub' => 1,
        'msup' => 1,
        'msubsup' => 1,
        'mtable' => 1,
        'mtd' => 1,
        'mtext' => 1,
        'mtr' => 1,
        'munder' => 1,
        'munderover' => 1,
    );

    /**
     * The svg elements.
     *
     * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element.
     * The w3c list appears to be lacking in some areas like filter effect elements.
     * That list can be found at http://www.w3.org/wiki/SVG/Elements.
     *
     * Note, FireFox appears to do a better job rendering filter effects than chrome.
     * While they are in the spec I'm not sure how widely implemented they are.
     *
     * @var array
     */
    public static $svg = array(
        'a' => 1,
        'altGlyph' => 1,
        'altGlyphDef' => 1,
        'altGlyphItem' => 1,
        'animate' => 1,
        'animateColor' => 1,
        'animateMotion' => 1,
        'animateTransform' => 1,
        'circle' => 1,
        'clipPath' => 1,
        'color-profile' => 1,
        'cursor' => 1,
        'defs' => 1,
        'desc' => 1,
        'ellipse' => 1,
        'feBlend' => 1,
        'feColorMatrix' => 1,
        'feComponentTransfer' => 1,
        'feComposite' => 1,
        'feConvolveMatrix' => 1,
        'feDiffuseLighting' => 1,
        'feDisplacementMap' => 1,
        'feDistantLight' => 1,
        'feFlood' => 1,
        'feFuncA' => 1,
        'feFuncB' => 1,
        'feFuncG' => 1,
        'feFuncR' => 1,
        'feGaussianBlur' => 1,
        'feImage' => 1,
        'feMerge' => 1,
        'feMergeNode' => 1,
        'feMorphology' => 1,
        'feOffset' => 1,
        'fePointLight' => 1,
        'feSpecularLighting' => 1,
        'feSpotLight' => 1,
        'feTile' => 1,
        'feTurbulence' => 1,
        'filter' => 1,
        'font' => 1,
        'font-face' => 1,
        'font-face-format' => 1,
        'font-face-name' => 1,
        'font-face-src' => 1,
        'font-face-uri' => 1,
        'foreignObject' => 1,
        'g' => 1,
        'glyph' => 1,
        'glyphRef' => 1,
        'hkern' => 1,
        'image' => 1,
        'line' => 1,
        'linearGradient' => 1,
        'marker' => 1,
        'mask' => 1,
        'metadata' => 1,
        'missing-glyph' => 1,
        'mpath' => 1,
        'path' => 1,
        'pattern' => 1,
        'polygon' => 1,
        'polyline' => 1,
        'radialGradient' => 1,
        'rect' => 1,
        'script' => 3, // NORMAL | RAW_TEXT
        'set' => 1,
        'stop' => 1,
        'style' => 3, // NORMAL | RAW_TEXT
        'svg' => 1,
        'switch' => 1,
        'symbol' => 1,
        'text' => 1,
        'textPath' => 1,
        'title' => 1,
        'tref' => 1,
        'tspan' => 1,
        'use' => 1,
        'view' => 1,
        'vkern' => 1,
    );

    /**
     * Some attributes in SVG are case sensitive.
     *
     * This map contains key/value pairs with the key as the lowercase attribute
     * name and the value with the correct casing.
     */
    public static $svgCaseSensitiveAttributeMap = array(
        'attributename' => 'attributeName',
        'attributetype' => 'attributeType',
        'basefrequency' => 'baseFrequency',
        'baseprofile' => 'baseProfile',
        'calcmode' => 'calcMode',
        'clippathunits' => 'clipPathUnits',
        'contentscripttype' => 'contentScriptType',
        'contentstyletype' => 'contentStyleType',
        'diffuseconstant' => 'diffuseConstant',
        'edgemode' => 'edgeMode',
        'externalresourcesrequired' => 'externalResourcesRequired',
        'filterres' => 'filterRes',
        'filterunits' => 'filterUnits',
        'glyphref' => 'glyphRef',
        'gradienttransform' => 'gradientTransform',
        'gradientunits' => 'gradientUnits',
        'kernelmatrix' => 'kernelMatrix',
        'kernelunitlength' => 'kernelUnitLength',
        'keypoints' => 'keyPoints',
        'keysplines' => 'keySplines',
        'keytimes' => 'keyTimes',
        'lengthadjust' => 'lengthAdjust',
        'limitingconeangle' => 'limitingConeAngle',
        'markerheight' => 'markerHeight',
        'markerunits' => 'markerUnits',
        'markerwidth' => 'markerWidth',
        'maskcontentunits' => 'maskContentUnits',
        'maskunits' => 'maskUnits',
        'numoctaves' => 'numOctaves',
        'pathlength' => 'pathLength',
        'patterncontentunits' => 'patternContentUnits',
        'patterntransform' => 'patternTransform',
        'patternunits' => 'patternUnits',
        'pointsatx' => 'pointsAtX',
        'pointsaty' => 'pointsAtY',
        'pointsatz' => 'pointsAtZ',
        'preservealpha' => 'preserveAlpha',
        'preserveaspectratio' => 'preserveAspectRatio',
        'primitiveunits' => 'primitiveUnits',
        'refx' => 'refX',
        'refy' => 'refY',
        'repeatcount' => 'repeatCount',
        'repeatdur' => 'repeatDur',
        'requiredextensions' => 'requiredExtensions',
        'requiredfeatures' => 'requiredFeatures',
        'specularconstant' => 'specularConstant',
        'specularexponent' => 'specularExponent',
        'spreadmethod' => 'spreadMethod',
        'startoffset' => 'startOffset',
        'stddeviation' => 'stdDeviation',
        'stitchtiles' => 'stitchTiles',
        'surfacescale' => 'surfaceScale',
        'systemlanguage' => 'systemLanguage',
        'tablevalues' => 'tableValues',
        'targetx' => 'targetX',
        'targety' => 'targetY',
        'textlength' => 'textLength',
        'viewbox' => 'viewBox',
        'viewtarget' => 'viewTarget',
        'xchannelselector' => 'xChannelSelector',
        'ychannelselector' => 'yChannelSelector',
        'zoomandpan' => 'zoomAndPan',
    );

    /**
     * Some SVG elements are case sensitive.
     * This map contains these.
     *
     * The map contains key/value store of the name is lowercase as the keys and
     * the correct casing as the value.
     */
    public static $svgCaseSensitiveElementMap = array(
        'altglyph' => 'altGlyph',
        'altglyphdef' => 'altGlyphDef',
        'altglyphitem' => 'altGlyphItem',
        'animatecolor' => 'animateColor',
        'animatemotion' => 'animateMotion',
        'animatetransform' => 'animateTransform',
        'clippath' => 'clipPath',
        'feblend' => 'feBlend',
        'fecolormatrix' => 'feColorMatrix',
        'fecomponenttransfer' => 'feComponentTransfer',
        'fecomposite' => 'feComposite',
        'feconvolvematrix' => 'feConvolveMatrix',
        'fediffuselighting' => 'feDiffuseLighting',
        'fedisplacementmap' => 'feDisplacementMap',
        'fedistantlight' => 'feDistantLight',
        'feflood' => 'feFlood',
        'fefunca' => 'feFuncA',
        'fefuncb' => 'feFuncB',
        'fefuncg' => 'feFuncG',
        'fefuncr' => 'feFuncR',
        'fegaussianblur' => 'feGaussianBlur',
        'feimage' => 'feImage',
        'femerge' => 'feMerge',
        'femergenode' => 'feMergeNode',
        'femorphology' => 'feMorphology',
        'feoffset' => 'feOffset',
        'fepointlight' => 'fePointLight',
        'fespecularlighting' => 'feSpecularLighting',
        'fespotlight' => 'feSpotLight',
        'fetile' => 'feTile',
        'feturbulence' => 'feTurbulence',
        'foreignobject' => 'foreignObject',
        'glyphref' => 'glyphRef',
        'lineargradient' => 'linearGradient',
        'radialgradient' => 'radialGradient',
        'textpath' => 'textPath',
    );

    /**
     * Check whether the given element meets the given criterion.
     *
     * Example:
     *
     * Elements::isA('script', Elements::TEXT_RAW); // Returns true.
     *
     * Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
     *
     * @param string $name The element name.
     * @param int    $mask One of the constants on this class.
     *
     * @return bool true if the element matches the mask, false otherwise.
     */
    public static function isA($name, $mask)
    {
        return (static::element($name) & $mask) === $mask;
    }

    /**
     * Test if an element is a valid html5 element.
     *
     * @param string $name The name of the element.
     *
     * @return bool true if a html5 element and false otherwise.
     */
    public static function isHtml5Element($name)
    {
        // html5 element names are case insensitive. Forcing lowercase for the check.
        // Do we need this check or will all data passed here already be lowercase?
        return isset(static::$html5[strtolower($name)]);
    }

    /**
     * Test if an element name is a valid MathML presentation element.
     *
     * @param string $name The name of the element.
     *
     * @return bool true if a MathML name and false otherwise.
     */
    public static function isMathMLElement($name)
    {
        // MathML is case-sensitive unlike html5 elements.
        return isset(static::$mathml[$name]);
    }

    /**
     * Test if an element is a valid SVG element.
     *
     * @param string $name The name of the element.
     *
     * @return bool true if a SVG element and false otherise.
     */
    public static function isSvgElement($name)
    {
        // SVG is case-sensitive unlike html5 elements.
        return isset(static::$svg[$name]);
    }

    /**
     * Is an element name valid in an html5 document.
     * This includes html5 elements along with other allowed embedded content
     * such as svg and mathml.
     *
     * @param string $name The name of the element.
     *
     * @return bool true if valid and false otherwise.
     */
    public static function isElement($name)
    {
        return static::isHtml5Element($name) || static::isMathMLElement($name) || static::isSvgElement($name);
    }

    /**
     * Get the element mask for the given element name.
     *
     * @param string $name The name of the element.
     *
     * @return int the element mask.
     */
    public static function element($name)
    {
        if (isset(static::$html5[$name])) {
            return static::$html5[$name];
        }
        if (isset(static::$svg[$name])) {
            return static::$svg[$name];
        }
        if (isset(static::$mathml[$name])) {
            return static::$mathml[$name];
        }

        return 0;
    }

    /**
     * Normalize a SVG element name to its proper case and form.
     *
     * @param string $name The name of the element.
     *
     * @return string the normalized form of the element name.
     */
    public static function normalizeSvgElement($name)
    {
        $name = strtolower($name);
        if (isset(static::$svgCaseSensitiveElementMap[$name])) {
            $name = static::$svgCaseSensitiveElementMap[$name];
        }

        return $name;
    }

    /**
     * Normalize a SVG attribute name to its proper case and form.
     *
     * @param string $name The name of the attribute.
     *
     * @return string The normalized form of the attribute name.
     */
    public static function normalizeSvgAttribute($name)
    {
        $name = strtolower($name);
        if (isset(static::$svgCaseSensitiveAttributeMap[$name])) {
            $name = static::$svgCaseSensitiveAttributeMap[$name];
        }

        return $name;
    }

    /**
     * Normalize a MathML attribute name to its proper case and form.
     * Note, all MathML element names are lowercase.
     *
     * @param string $name The name of the attribute.
     *
     * @return string The normalized form of the attribute name.
     */
    public static function normalizeMathMlAttribute($name)
    {
        $name = strtolower($name);

        // Only one attribute has a mixed case form for MathML.
        if ('definitionurl' === $name) {
            $name = 'definitionURL';
        }

        return $name;
    }
}
PK      ��Z�� ��	  �	    Parser/InputStream.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/**
 * Interface for stream readers.
 *
 * The parser only reads from streams. Various input sources can write
 * an adapater to this InputStream.
 *
 * Currently provided InputStream implementations include
 * FileInputStream and StringInputStream.
 *
 * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
 */
interface InputStream extends \Iterator
{
    /**
     * Returns the current line that is being consumed.
     *
     * TODO: Move this to the scanner.
     */
    public function currentLine();

    /**
     * Returns the current column of the current line that the tokenizer is at.
     *
     * Newlines are column 0. The first char after a newline is column 1.
     *
     * @TODO Move this to the scanner.
     *
     * @return int The column number.
     */
    public function columnOffset();

    /**
     * Get all characters until EOF.
     *
     * This consumes characters until the EOF.
     */
    public function remainingChars();

    /**
     * Read to a particular match (or until $max bytes are consumed).
     *
     * This operates on byte sequences, not characters.
     *
     * Matches as far as possible until we reach a certain set of bytes
     * and returns the matched substring.
     *
     * @see strcspn
     *
     * @param string $bytes Bytes to match.
     * @param int    $max   Maximum number of bytes to scan.
     *
     * @return mixed Index or false if no match is found. You should use strong
     *               equality when checking the result, since index could be 0.
     */
    public function charsUntil($bytes, $max = null);

    /**
     * Returns the string so long as $bytes matches.
     *
     * Matches as far as possible with a certain set of bytes
     * and returns the matched substring.
     *
     * @see strspn
     *
     * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
     *                      current char, the pointer advances and the char is part of the
     *                      substring.
     * @param int    $max   The max number of chars to read.
     */
    public function charsWhile($bytes, $max = null);

    /**
     * Unconsume one character.
     *
     * @param int $howMany The number of characters to move the pointer back.
     */
    public function unconsume($howMany = 1);

    /**
     * Retrieve the next character without advancing the pointer.
     */
    public function peek();
}
PK      ��Z�:�X�   �     Parser/ParseError.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/**
 * Emit when the parser has an error.
 */
class ParseError extends \Exception
{
}
PK      ��Z �w"Y  Y    Parser/DOMTreeBuilder.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

use Masterminds\HTML5\Elements;
use Masterminds\HTML5\InstructionProcessor;

/**
 * Create an HTML5 DOM tree from events.
 *
 * This attempts to create a DOM from events emitted by a parser. This
 * attempts (but does not guarantee) to up-convert older HTML documents
 * to HTML5. It does this by applying HTML5's rules, but it will not
 * change the architecture of the document itself.
 *
 * Many of the error correction and quirks features suggested in the specification
 * are implemented herein; however, not all of them are. Since we do not
 * assume a graphical user agent, no presentation-specific logic is conducted
 * during tree building.
 *
 * FIXME: The present tree builder does not exactly follow the state machine rules
 * for insert modes as outlined in the HTML5 spec. The processor needs to be
 * re-written to accomodate this. See, for example, the Go language HTML5
 * parser.
 */
class DOMTreeBuilder implements EventHandler
{
    /**
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
     */
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';

    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';

    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';

    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';

    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';

    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';

    const OPT_DISABLE_HTML_NS = 'disable_html_ns';

    const OPT_TARGET_DOC = 'target_document';

    const OPT_IMPLICIT_NS = 'implicit_namespaces';

    /**
     * Holds the HTML5 element names that causes a namespace switch.
     *
     * @var array
     */
    protected $nsRoots = array(
        'html' => self::NAMESPACE_HTML,
        'svg' => self::NAMESPACE_SVG,
        'math' => self::NAMESPACE_MATHML,
    );

    /**
     * Holds the always available namespaces (which does not require the XMLNS declaration).
     *
     * @var array
     */
    protected $implicitNamespaces = array(
        'xml' => self::NAMESPACE_XML,
        'xmlns' => self::NAMESPACE_XMLNS,
        'xlink' => self::NAMESPACE_XLINK,
    );

    /**
     * Holds a stack of currently active namespaces.
     *
     * @var array
     */
    protected $nsStack = array();

    /**
     * Holds the number of namespaces declared by a node.
     *
     * @var array
     */
    protected $pushes = array();

    /**
     * Defined in 8.2.5.
     */
    const IM_INITIAL = 0;

    const IM_BEFORE_HTML = 1;

    const IM_BEFORE_HEAD = 2;

    const IM_IN_HEAD = 3;

    const IM_IN_HEAD_NOSCRIPT = 4;

    const IM_AFTER_HEAD = 5;

    const IM_IN_BODY = 6;

    const IM_TEXT = 7;

    const IM_IN_TABLE = 8;

    const IM_IN_TABLE_TEXT = 9;

    const IM_IN_CAPTION = 10;

    const IM_IN_COLUMN_GROUP = 11;

    const IM_IN_TABLE_BODY = 12;

    const IM_IN_ROW = 13;

    const IM_IN_CELL = 14;

    const IM_IN_SELECT = 15;

    const IM_IN_SELECT_IN_TABLE = 16;

    const IM_AFTER_BODY = 17;

    const IM_IN_FRAMESET = 18;

    const IM_AFTER_FRAMESET = 19;

    const IM_AFTER_AFTER_BODY = 20;

    const IM_AFTER_AFTER_FRAMESET = 21;

    const IM_IN_SVG = 22;

    const IM_IN_MATHML = 23;

    protected $options = array();

    protected $stack = array();

    protected $current; // Pointer in the tag hierarchy.
    protected $rules;
    protected $doc;

    protected $frag;

    protected $processor;

    protected $insertMode = 0;

    /**
     * Track if we are in an element that allows only inline child nodes.
     *
     * @var string|null
     */
    protected $onlyInline;

    /**
     * Quirks mode is enabled by default.
     * Any document that is missing the DT will be considered to be in quirks mode.
     */
    protected $quirks = true;

    protected $errors = array();

    public function __construct($isFragment = false, array $options = array())
    {
        $this->options = $options;

        if (isset($options[self::OPT_TARGET_DOC])) {
            $this->doc = $options[self::OPT_TARGET_DOC];
        } else {
            $impl = new \DOMImplementation();
            // XXX:
            // Create the doctype. For now, we are always creating HTML5
            // documents, and attempting to up-convert any older DTDs to HTML5.
            $dt = $impl->createDocumentType('html');
            // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
            $this->doc = $impl->createDocument(null, '', $dt);
            $this->doc->encoding = !empty($options['encoding']) ? $options['encoding'] : 'UTF-8';
        }

        $this->errors = array();

        $this->current = $this->doc; // ->documentElement;

        // Create a rules engine for tags.
        $this->rules = new TreeBuildingRules();

        $implicitNS = array();
        if (isset($this->options[self::OPT_IMPLICIT_NS])) {
            $implicitNS = $this->options[self::OPT_IMPLICIT_NS];
        } elseif (isset($this->options['implicitNamespaces'])) {
            $implicitNS = $this->options['implicitNamespaces'];
        }

        // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
        array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);

        if ($isFragment) {
            $this->insertMode = static::IM_IN_BODY;
            $this->frag = $this->doc->createDocumentFragment();
            $this->current = $this->frag;
        }
    }

    /**
     * Get the document.
     */
    public function document()
    {
        return $this->doc;
    }

    /**
     * Get the DOM fragment for the body.
     *
     * This returns a DOMNodeList because a fragment may have zero or more
     * DOMNodes at its root.
     *
     * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
     *
     * @return \DOMDocumentFragment
     */
    public function fragment()
    {
        return $this->frag;
    }

    /**
     * Provide an instruction processor.
     *
     * This is used for handling Processor Instructions as they are
     * inserted. If omitted, PI's are inserted directly into the DOM tree.
     *
     * @param InstructionProcessor $proc
     */
    public function setInstructionProcessor(InstructionProcessor $proc)
    {
        $this->processor = $proc;
    }

    public function doctype($name, $idType = 0, $id = null, $quirks = false)
    {
        // This is used solely for setting quirks mode. Currently we don't
        // try to preserve the inbound DT. We convert it to HTML5.
        $this->quirks = $quirks;

        if ($this->insertMode > static::IM_INITIAL) {
            $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);

            return;
        }

        $this->insertMode = static::IM_BEFORE_HTML;
    }

    /**
     * Process the start tag.
     *
     * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
     *       - XLink, MathML and SVG namespace handling
     *       - Omission rules: 8.1.2.4 Optional tags
     *
     * @param string $name
     * @param array  $attributes
     * @param bool   $selfClosing
     *
     * @return int
     */
    public function startTag($name, $attributes = array(), $selfClosing = false)
    {
        $lname = $this->normalizeTagName($name);

        // Make sure we have an html element.
        if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
            $this->startTag('html');
        }

        // Set quirks mode if we're at IM_INITIAL with no doctype.
        if ($this->insertMode === static::IM_INITIAL) {
            $this->quirks = true;
            $this->parseError('No DOCTYPE specified.');
        }

        // SPECIAL TAG HANDLING:
        // Spec says do this, and "don't ask."
        // find the spec where this is defined... looks problematic
        if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
            $name = 'img';
        }

        // Autoclose p tags where appropriate.
        if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
            $this->autoclose('p');
        }

        // Set insert mode:
        switch ($name) {
            case 'html':
                $this->insertMode = static::IM_BEFORE_HEAD;
                break;
            case 'head':
                if ($this->insertMode > static::IM_BEFORE_HEAD) {
                    $this->parseError('Unexpected head tag outside of head context.');
                } else {
                    $this->insertMode = static::IM_IN_HEAD;
                }
                break;
            case 'body':
                $this->insertMode = static::IM_IN_BODY;
                break;
            case 'svg':
                $this->insertMode = static::IM_IN_SVG;
                break;
            case 'math':
                $this->insertMode = static::IM_IN_MATHML;
                break;
            case 'noscript':
                if ($this->insertMode === static::IM_IN_HEAD) {
                    $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
                }
                break;
        }

        // Special case handling for SVG.
        if ($this->insertMode === static::IM_IN_SVG) {
            $lname = Elements::normalizeSvgElement($lname);
        }

        $pushes = 0;
        // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
        if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
            array_unshift($this->nsStack, array(
                '' => $this->nsRoots[$lname],
            ) + $this->nsStack[0]);
            ++$pushes;
        }
        $needsWorkaround = false;
        if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
            // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
            foreach ($attributes as $aName => $aVal) {
                if ('xmlns' === $aName) {
                    $needsWorkaround = $aVal;
                    array_unshift($this->nsStack, array(
                        '' => $aVal,
                    ) + $this->nsStack[0]);
                    ++$pushes;
                } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
                    array_unshift($this->nsStack, array(
                        substr($aName, $pos + 1) => $aVal,
                    ) + $this->nsStack[0]);
                    ++$pushes;
                }
            }
        }

        if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
            $this->autoclose($this->onlyInline);
            $this->onlyInline = null;
        }

        try {
            $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';

            if (false !== $needsWorkaround) {
                $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';

                $frag = new \DOMDocument('1.0', 'UTF-8');
                $frag->loadXML($xml);

                $ele = $this->doc->importNode($frag->documentElement, true);
            } else {
                if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
                    $ele = $this->doc->createElement($lname);
                } else {
                    $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
                }
            }
        } catch (\DOMException $e) {
            $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
            $ele = $this->doc->createElement('invalid');
        }

        if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
            $this->onlyInline = $lname;
        }

        // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
        // When we are on a void tag, we do not need to care about namesapce nesting.
        if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) {
            // PHP tends to free the memory used by DOM,
            // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
            // see https://bugs.php.net/bug.php?id=67459
            $this->pushes[spl_object_hash($ele)] = array($pushes, $ele);
        }

        foreach ($attributes as $aName => $aVal) {
            // xmlns attributes can't be set
            if ('xmlns' === $aName) {
                continue;
            }

            if ($this->insertMode === static::IM_IN_SVG) {
                $aName = Elements::normalizeSvgAttribute($aName);
            } elseif ($this->insertMode === static::IM_IN_MATHML) {
                $aName = Elements::normalizeMathMlAttribute($aName);
            }

            $aVal = (string) $aVal;

            try {
                $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;

                if ('xmlns' === $prefix) {
                    $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
                } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
                    $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
                } else {
                    $ele->setAttribute($aName, $aVal);
                }
            } catch (\DOMException $e) {
                $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
                continue;
            }

            // This is necessary on a non-DTD schema, like HTML5.
            if ('id' === $aName) {
                $ele->setIdAttribute('id', true);
            }
        }

        if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
            // Some elements have special processing rules. Handle those separately.
            $this->current = $this->rules->evaluate($ele, $this->current);
        } else {
            // Otherwise, it's a standard element.
            $this->current->appendChild($ele);

            if (!Elements::isA($name, Elements::VOID_TAG)) {
                $this->current = $ele;
            }

            // Self-closing tags should only be respected on foreign elements
            // (and are implied on void elements)
            // See: https://www.w3.org/TR/html5/syntax.html#start-tags
            if (Elements::isHtml5Element($name)) {
                $selfClosing = false;
            }
        }

        // This is sort of a last-ditch attempt to correct for cases where no head/body
        // elements are provided.
        if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
            $this->insertMode = static::IM_IN_BODY;
        }

        // When we are on a void tag, we do not need to care about namesapce nesting,
        // but we have to remove the namespaces pushed to $nsStack.
        if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
            // remove the namespaced definded by current node
            for ($i = 0; $i < $pushes; ++$i) {
                array_shift($this->nsStack);
            }
        }

        if ($selfClosing) {
            $this->endTag($name);
        }

        // Return the element mask, which the tokenizer can then use to set
        // various processing rules.
        return Elements::element($name);
    }

    public function endTag($name)
    {
        $lname = $this->normalizeTagName($name);

        // Special case within 12.2.6.4.7: An end tag whose tag name is "br" should be treated as an opening tag
        if ('br' === $name) {
            $this->parseError('Closing tag encountered for void element br.');

            $this->startTag('br');
        }
        // Ignore closing tags for other unary elements.
        elseif (Elements::isA($name, Elements::VOID_TAG)) {
            return;
        }

        if ($this->insertMode <= static::IM_BEFORE_HTML) {
            // 8.2.5.4.2
            if (in_array($name, array(
                'html',
                'br',
                'head',
                'title',
            ))) {
                $this->startTag('html');
                $this->endTag($name);
                $this->insertMode = static::IM_BEFORE_HEAD;

                return;
            }

            // Ignore the tag.
            $this->parseError('Illegal closing tag at global scope.');

            return;
        }

        // Special case handling for SVG.
        if ($this->insertMode === static::IM_IN_SVG) {
            $lname = Elements::normalizeSvgElement($lname);
        }

        $cid = spl_object_hash($this->current);

        // XXX: HTML has no parent. What do we do, though,
        // if this element appears in the wrong place?
        if ('html' === $lname) {
            return;
        }

        // remove the namespaced definded by current node
        if (isset($this->pushes[$cid])) {
            for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
                array_shift($this->nsStack);
            }
            unset($this->pushes[$cid]);
        }

        if (!$this->autoclose($lname)) {
            $this->parseError('Could not find closing tag for ' . $lname);
        }

        switch ($lname) {
            case 'head':
                $this->insertMode = static::IM_AFTER_HEAD;
                break;
            case 'body':
                $this->insertMode = static::IM_AFTER_BODY;
                break;
            case 'svg':
            case 'mathml':
                $this->insertMode = static::IM_IN_BODY;
                break;
        }
    }

    public function comment($cdata)
    {
        // TODO: Need to handle case where comment appears outside of the HTML tag.
        $node = $this->doc->createComment($cdata);
        $this->current->appendChild($node);
    }

    public function text($data)
    {
        // XXX: Hmmm.... should we really be this strict?
        if ($this->insertMode < static::IM_IN_HEAD) {
            // Per '8.2.5.4.3 The "before head" insertion mode' the characters
            // " \t\n\r\f" should be ignored but no mention of a parse error. This is
            // practical as most documents contain these characters. Other text is not
            // expected here so recording a parse error is necessary.
            $dataTmp = trim($data, " \t\n\r\f");
            if (!empty($dataTmp)) {
                // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
                $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
            }

            return;
        }
        // fprintf(STDOUT, "Appending text %s.", $data);
        $node = $this->doc->createTextNode($data);
        $this->current->appendChild($node);
    }

    public function eof()
    {
        // If the $current isn't the $root, do we need to do anything?
    }

    public function parseError($msg, $line = 0, $col = 0)
    {
        $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
    }

    public function getErrors()
    {
        return $this->errors;
    }

    public function cdata($data)
    {
        $node = $this->doc->createCDATASection($data);
        $this->current->appendChild($node);
    }

    public function processingInstruction($name, $data = null)
    {
        // XXX: Ignore initial XML declaration, per the spec.
        if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
            return;
        }

        // Important: The processor may modify the current DOM tree however it sees fit.
        if ($this->processor instanceof InstructionProcessor) {
            $res = $this->processor->process($this->current, $name, $data);
            if (!empty($res)) {
                $this->current = $res;
            }

            return;
        }

        // Otherwise, this is just a dumb PI element.
        $node = $this->doc->createProcessingInstruction($name, $data);

        $this->current->appendChild($node);
    }

    // ==========================================================================
    // UTILITIES
    // ==========================================================================

    /**
     * Apply normalization rules to a tag name.
     * See sections 2.9 and 8.1.2.
     *
     * @param string $tagName
     *
     * @return string The normalized tag name.
     */
    protected function normalizeTagName($tagName)
    {
        /*
         * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
         */
        return $tagName;
    }

    protected function quirksTreeResolver($name)
    {
        throw new \Exception('Not implemented.');
    }

    /**
     * Automatically climb the tree and close the closest node with the matching $tag.
     *
     * @param string $tagName
     *
     * @return bool
     */
    protected function autoclose($tagName)
    {
        $working = $this->current;
        do {
            if (XML_ELEMENT_NODE !== $working->nodeType) {
                return false;
            }
            if ($working->tagName === $tagName) {
                $this->current = $working->parentNode;

                return true;
            }
        } while ($working = $working->parentNode);

        return false;
    }

    /**
     * Checks if the given tagname is an ancestor of the present candidate.
     *
     * If $this->current or anything above $this->current matches the given tag
     * name, this returns true.
     *
     * @param string $tagName
     *
     * @return bool
     */
    protected function isAncestor($tagName)
    {
        $candidate = $this->current;
        while (XML_ELEMENT_NODE === $candidate->nodeType) {
            if ($candidate->tagName === $tagName) {
                return true;
            }
            $candidate = $candidate->parentNode;
        }

        return false;
    }

    /**
     * Returns true if the immediate parent element is of the given tagname.
     *
     * @param string $tagName
     *
     * @return bool
     */
    protected function isParent($tagName)
    {
        return $this->current->tagName === $tagName;
    }
}
PK      ��Z���G�  �    Parser/CharacterReference.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

use Masterminds\HTML5\Entities;

/**
 * Manage entity references.
 *
 * This is a simple resolver for HTML5 character reference entitites. See Entities for the list of supported entities.
 */
class CharacterReference
{
    protected static $numeric_mask = array(
        0x0,
        0x2FFFF,
        0,
        0xFFFF,
    );

    /**
     * Given a name (e.g. 'amp'), lookup the UTF-8 character ('&').
     *
     * @param string $name The name to look up.
     *
     * @return string The character sequence. In UTF-8 this may be more than one byte.
     */
    public static function lookupName($name)
    {
        // Do we really want to return NULL here? or FFFD
        return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : null;
    }

    /**
     * Given a decimal number, return the UTF-8 character.
     *
     * @param $int
     *
     * @return false|string|string[]|null
     */
    public static function lookupDecimal($int)
    {
        $entity = '&#' . $int . ';';

        // UNTESTED: This may fail on some planes. Couldn't find full documentation
        // on the value of the mask array.
        return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
    }

    /**
     * Given a hexidecimal number, return the UTF-8 character.
     *
     * @param $hexdec
     *
     * @return false|string|string[]|null
     */
    public static function lookupHex($hexdec)
    {
        return static::lookupDecimal(hexdec($hexdec));
    }
}
PK      ��Z�����,  �,    Parser/Scanner.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

use Masterminds\HTML5\Exception;

/**
 * The scanner scans over a given data input to react appropriately to characters.
 */
class Scanner
{
    const CHARS_HEX = 'abcdefABCDEF01234567890';
    const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
    const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';

    /**
     * The string data we're parsing.
     */
    private $data;

    /**
     * The current integer byte position we are in $data.
     */
    private $char;

    /**
     * Length of $data; when $char === $data, we are at the end-of-file.
     */
    private $EOF;

    /**
     * Parse errors.
     */
    public $errors = array();

    /**
     * Create a new Scanner.
     *
     * @param string $data     Data to parse.
     * @param string $encoding The encoding to use for the data.
     *
     * @throws Exception If the given data cannot be encoded to UTF-8.
     */
    public function __construct($data, $encoding = 'UTF-8')
    {
        if ($data instanceof InputStream) {
            @trigger_error('InputStream objects are deprecated since version 2.4 and will be removed in 3.0. Use strings instead.', E_USER_DEPRECATED);
            $data = (string) $data;
        }

        $data = UTF8Utils::convertToUTF8($data, $encoding);

        // There is good reason to question whether it makes sense to
        // do this here, since most of these checks are done during
        // parsing, and since this check doesn't actually *do* anything.
        $this->errors = UTF8Utils::checkForIllegalCodepoints($data);

        $data = $this->replaceLinefeeds($data);

        $this->data = $data;
        $this->char = 0;
        $this->EOF = strlen($data);
    }

    /**
     * Check if upcomming chars match the given sequence.
     *
     * This will read the stream for the $sequence. If it's
     * found, this will return true. If not, return false.
     * Since this unconsumes any chars it reads, the caller
     * will still need to read the next sequence, even if
     * this returns true.
     *
     * Example: $this->scanner->sequenceMatches('</script>') will
     * see if the input stream is at the start of a
     * '</script>' string.
     *
     * @param string $sequence
     * @param bool   $caseSensitive
     *
     * @return bool
     */
    public function sequenceMatches($sequence, $caseSensitive = true)
    {
        $portion = substr($this->data, $this->char, strlen($sequence));

        return $caseSensitive ? $portion === $sequence : 0 === strcasecmp($portion, $sequence);
    }

    /**
     * Get the current position.
     *
     * @return int The current intiger byte position.
     */
    public function position()
    {
        return $this->char;
    }

    /**
     * Take a peek at the next character in the data.
     *
     * @return string The next character.
     */
    public function peek()
    {
        if (($this->char + 1) < $this->EOF) {
            return $this->data[$this->char + 1];
        }

        return false;
    }

    /**
     * Get the next character.
     * Note: This advances the pointer.
     *
     * @return string The next character.
     */
    public function next()
    {
        ++$this->char;

        if ($this->char < $this->EOF) {
            return $this->data[$this->char];
        }

        return false;
    }

    /**
     * Get the current character.
     * Note, this does not advance the pointer.
     *
     * @return string The current character.
     */
    public function current()
    {
        if ($this->char < $this->EOF) {
            return $this->data[$this->char];
        }

        return false;
    }

    /**
     * Silently consume N chars.
     *
     * @param int $count
     */
    public function consume($count = 1)
    {
        $this->char += $count;
    }

    /**
     * Unconsume some of the data.
     * This moves the data pointer backwards.
     *
     * @param int $howMany The number of characters to move the pointer back.
     */
    public function unconsume($howMany = 1)
    {
        if (($this->char - $howMany) >= 0) {
            $this->char -= $howMany;
        }
    }

    /**
     * Get the next group of that contains hex characters.
     * Note, along with getting the characters the pointer in the data will be
     * moved as well.
     *
     * @return string The next group that is hex characters.
     */
    public function getHex()
    {
        return $this->doCharsWhile(static::CHARS_HEX);
    }

    /**
     * Get the next group of characters that are ASCII Alpha characters.
     * Note, along with getting the characters the pointer in the data will be
     * moved as well.
     *
     * @return string The next group of ASCII alpha characters.
     */
    public function getAsciiAlpha()
    {
        return $this->doCharsWhile(static::CHARS_ALPHA);
    }

    /**
     * Get the next group of characters that are ASCII Alpha characters and numbers.
     * Note, along with getting the characters the pointer in the data will be
     * moved as well.
     *
     * @return string The next group of ASCII alpha characters and numbers.
     */
    public function getAsciiAlphaNum()
    {
        return $this->doCharsWhile(static::CHARS_ALNUM);
    }

    /**
     * Get the next group of numbers.
     * Note, along with getting the characters the pointer in the data will be
     * moved as well.
     *
     * @return string The next group of numbers.
     */
    public function getNumeric()
    {
        return $this->doCharsWhile('0123456789');
    }

    /**
     * Consume whitespace.
     * Whitespace in HTML5 is: formfeed, tab, newline, space.
     *
     * @return int The length of the matched whitespaces.
     */
    public function whitespace()
    {
        if ($this->char >= $this->EOF) {
            return false;
        }

        $len = strspn($this->data, "\n\t\f ", $this->char);

        $this->char += $len;

        return $len;
    }

    /**
     * Returns the current line that is being consumed.
     *
     * @return int The current line number.
     */
    public function currentLine()
    {
        if (empty($this->EOF) || 0 === $this->char) {
            return 1;
        }

        // Add one to $this->char because we want the number for the next
        // byte to be processed.
        return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
    }

    /**
     * Read chars until something in the mask is encountered.
     *
     * @param string $mask
     *
     * @return mixed
     */
    public function charsUntil($mask)
    {
        return $this->doCharsUntil($mask);
    }

    /**
     * Read chars as long as the mask matches.
     *
     * @param string $mask
     *
     * @return int
     */
    public function charsWhile($mask)
    {
        return $this->doCharsWhile($mask);
    }

    /**
     * Returns the current column of the current line that the tokenizer is at.
     *
     * Newlines are column 0. The first char after a newline is column 1.
     *
     * @return int The column number.
     */
    public function columnOffset()
    {
        // Short circuit for the first char.
        if (0 === $this->char) {
            return 0;
        }

        // strrpos is weird, and the offset needs to be negative for what we
        // want (i.e., the last \n before $this->char). This needs to not have
        // one (to make it point to the next character, the one we want the
        // position of) added to it because strrpos's behaviour includes the
        // final offset byte.
        $backwardFrom = $this->char - 1 - strlen($this->data);
        $lastLine = strrpos($this->data, "\n", $backwardFrom);

        // However, for here we want the length up until the next byte to be
        // processed, so add one to the current byte ($this->char).
        if (false !== $lastLine) {
            $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
        } else {
            // After a newline.
            $findLengthOf = substr($this->data, 0, $this->char);
        }

        return UTF8Utils::countChars($findLengthOf);
    }

    /**
     * Get all characters until EOF.
     *
     * This consumes characters until the EOF.
     *
     * @return int The number of characters remaining.
     */
    public function remainingChars()
    {
        if ($this->char < $this->EOF) {
            $data = substr($this->data, $this->char);
            $this->char = $this->EOF;

            return $data;
        }

        return ''; // false;
    }

    /**
     * Replace linefeed characters according to the spec.
     *
     * @param $data
     *
     * @return string
     */
    private function replaceLinefeeds($data)
    {
        /*
         * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially.
         * Any CR characters that are followed by LF characters must be removed, and any CR characters not
         * followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are
         * represented by LF characters, and there are never any CR characters in the input to the tokenization
         * stage.
         */
        $crlfTable = array(
            "\0" => "\xEF\xBF\xBD",
            "\r\n" => "\n",
            "\r" => "\n",
        );

        return strtr($data, $crlfTable);
    }

    /**
     * Read to a particular match (or until $max bytes are consumed).
     *
     * This operates on byte sequences, not characters.
     *
     * Matches as far as possible until we reach a certain set of bytes
     * and returns the matched substring.
     *
     * @param string $bytes Bytes to match.
     * @param int    $max   Maximum number of bytes to scan.
     *
     * @return mixed Index or false if no match is found. You should use strong
     *               equality when checking the result, since index could be 0.
     */
    private function doCharsUntil($bytes, $max = null)
    {
        if ($this->char >= $this->EOF) {
            return false;
        }

        if (0 === $max || $max) {
            $len = strcspn($this->data, $bytes, $this->char, $max);
        } else {
            $len = strcspn($this->data, $bytes, $this->char);
        }

        $string = (string) substr($this->data, $this->char, $len);
        $this->char += $len;

        return $string;
    }

    /**
     * Returns the string so long as $bytes matches.
     *
     * Matches as far as possible with a certain set of bytes
     * and returns the matched substring.
     *
     * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
     *                      current char, the pointer advances and the char is part of the
     *                      substring.
     * @param int    $max   The max number of chars to read.
     *
     * @return string
     */
    private function doCharsWhile($bytes, $max = null)
    {
        if ($this->char >= $this->EOF) {
            return false;
        }

        if (0 === $max || $max) {
            $len = strspn($this->data, $bytes, $this->char, $max);
        } else {
            $len = strspn($this->data, $bytes, $this->char);
        }

        $string = (string) substr($this->data, $this->char, $len);
        $this->char += $len;

        return $string;
    }
}
PK      ��Z0*rl  l    Parser/UTF8Utils.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/*
Portions based on code from html5lib files with the following copyright:

Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

use Masterminds\HTML5\Exception;

class UTF8Utils
{
    /**
     * The Unicode replacement character.
     */
    const FFFD = "\xEF\xBF\xBD";

    /**
     * Count the number of characters in a string.
     * UTF-8 aware. This will try (in order) iconv, MB, and finally a custom counter.
     *
     * @param string $string
     *
     * @return int
     */
    public static function countChars($string)
    {
        // Get the length for the string we need.
        if (function_exists('mb_strlen')) {
            return mb_strlen($string, 'utf-8');
        }

        if (function_exists('iconv_strlen')) {
            return iconv_strlen($string, 'utf-8');
        }

        $count = count_chars($string);

        // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
        // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
        return array_sum(array_slice($count, 0, 0x80)) + array_sum(array_slice($count, 0xC2, 0x33));
    }

    /**
     * Convert data from the given encoding to UTF-8.
     *
     * This has not yet been tested with charactersets other than UTF-8.
     * It should work with ISO-8859-1/-13 and standard Latin Win charsets.
     *
     * @param string $data     The data to convert
     * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
     *
     * @return string
     */
    public static function convertToUTF8($data, $encoding = 'UTF-8')
    {
        /*
         * From the HTML5 spec: Given an encoding, the bytes in the input stream must be converted
         * to Unicode characters for the tokeniser, as described by the rules for that encoding,
         * except that the leading U+FEFF BYTE ORDER MARK character, if any, must not be stripped
         * by the encoding layer (it is stripped by the rule below). Bytes or sequences of bytes
         * in the original byte stream that could not be converted to Unicode characters must be
         * converted to U+FFFD REPLACEMENT CHARACTER code points.
         */

        // mb_convert_encoding is chosen over iconv because of a bug. The best
        // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643
        // which contains links to the actual but reports as well as work around
        // details.
        if (function_exists('mb_convert_encoding')) {
            // mb library has the following behaviors:
            // - UTF-16 surrogates result in false.
            // - Overlongs and outside Plane 16 result in empty strings.

            // Before we run mb_convert_encoding we need to tell it what to do with
            // characters it does not know. This could be different than the parent
            // application executing this library so we store the value, change it
            // to our needs, and then change it back when we are done. This feels
            // a little excessive and it would be great if there was a better way.
            $save = mb_substitute_character();
            mb_substitute_character('none');
            $data = mb_convert_encoding($data, 'UTF-8', $encoding);
            mb_substitute_character($save);
        }
        // @todo Get iconv running in at least some environments if that is possible.
        elseif (function_exists('iconv') && 'auto' !== $encoding) {
            // fprintf(STDOUT, "iconv found\n");
            // iconv has the following behaviors:
            // - Overlong representations are ignored.
            // - Beyond Plane 16 is replaced with a lower char.
            // - Incomplete sequences generate a warning.
            $data = @iconv($encoding, 'UTF-8//IGNORE', $data);
        } else {
            throw new Exception('Not implemented, please install mbstring or iconv');
        }

        /*
         * One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
         */
        if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
            $data = substr($data, 3);
        }

        return $data;
    }

    /**
     * Checks for Unicode code points that are not valid in a document.
     *
     * @param string $data A string to analyze
     *
     * @return array An array of (string) error messages produced by the scanning
     */
    public static function checkForIllegalCodepoints($data)
    {
        // Vestigal error handling.
        $errors = array();

        /*
         * All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs.
         * Any occurrences of such characters is a parse error.
         */
        for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
            $errors[] = 'null-character';
        }

        /*
         * Any occurrences of any characters in the ranges U+0001 to U+0008, U+000B, U+000E to U+001F, U+007F
         * to U+009F, U+D800 to U+DFFF , U+FDD0 to U+FDEF, and characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF,
         * U+2FFFE, U+2FFFF, U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, U+6FFFF, U+7FFFE,
         * U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF,
         * U+DFFFE, U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and U+10FFFF are parse errors.
         * (These are all control characters or permanently undefined Unicode characters.)
         */
        // Check PCRE is loaded.
        $count = preg_match_all(
            '/(?:
        [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B,  U+000E to U+001F and U+007F
      |
        \xC2[\x80-\x9F] # U+0080 to U+009F
      |
        \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
      |
        \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
      |
        \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
      |
        [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
      )/x', $data, $matches);
        for ($i = 0; $i < $count; ++$i) {
            $errors[] = 'invalid-codepoint';
        }

        return $errors;
    }
}
PK      ��Z�%Ʉ�  �    Parser/README.mdnu W+A��        # The Parser Model

The parser model here follows the model in section
[8.2.1](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#parsing)
of the HTML5 specification, though we do not assume a networking layer.

     [ InputStream ]    // Generic support for reading input.
           ||
      [ Scanner ]       // Breaks down the stream into characters.
           ||
     [ Tokenizer ]      // Groups characters into syntactic
           ||
    [ Tree Builder ]    // Organizes units into a tree of objects
           ||
     [ DOM Document ]     // The final state of the parsed document.


## InputStream

This is an interface with at least two concrete implementations:

- StringInputStream: Reads an HTML5 string.
- FileInputStream: Reads an HTML5 file.

## Scanner

This is a mechanical piece of the parser.

## Tokenizer

This follows section 8.4 of the HTML5 spec. It is (roughly) a recursive
descent parser. (Though there are plenty of optimizations that are less
than purely functional.

## EventHandler and DOMTree

EventHandler is the interface for tree builders. Since not all
implementations will necessarily build trees, we've chosen a more
generic name.

The event handler emits tokens during tokenization.

The DOMTree is an event handler that builds a DOM tree. The output of
the DOMTree builder is a DOMDocument.

## DOMDocument

PHP has a DOMDocument class built-in (technically, it's part of libxml.)
We use that, thus rendering the output of this process compatible with
SimpleXML, QueryPath, and many other XML/HTML processing tools.

For cases where the HTML5 is a fragment of a HTML5 document a
DOMDocumentFragment is returned instead. This is another built-in class.
PK      ��Z��ز:%  :%    Parser/StringInputStream.phpnu W+A��        <?php
/**
 * Loads a string to be parsed.
 */

namespace Masterminds\HTML5\Parser;

/*
 *
* Based on code from html5lib:

Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

*/

// Some conventions:
// - /* */ indicates verbatim text from the HTML 5 specification
//   MPB: Not sure which version of the spec. Moving from HTML5lib to
//   HTML5-PHP, I have been using this version:
//   http://www.w3.org/TR/2012/CR-html5-20121217/Overview.html#contents
//
// - // indicates regular comments

/**
 * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
 */
class StringInputStream implements InputStream
{
    /**
     * The string data we're parsing.
     */
    private $data;

    /**
     * The current integer byte position we are in $data.
     */
    private $char;

    /**
     * Length of $data; when $char === $data, we are at the end-of-file.
     */
    private $EOF;

    /**
     * Parse errors.
     */
    public $errors = array();

    /**
     * Create a new InputStream wrapper.
     *
     * @param string $data     Data to parse.
     * @param string $encoding The encoding to use for the data.
     * @param string $debug    A fprintf format to use to echo the data on stdout.
     */
    public function __construct($data, $encoding = 'UTF-8', $debug = '')
    {
        $data = UTF8Utils::convertToUTF8($data, $encoding);
        if ($debug) {
            fprintf(STDOUT, $debug, $data, strlen($data));
        }

        // There is good reason to question whether it makes sense to
        // do this here, since most of these checks are done during
        // parsing, and since this check doesn't actually *do* anything.
        $this->errors = UTF8Utils::checkForIllegalCodepoints($data);

        $data = $this->replaceLinefeeds($data);

        $this->data = $data;
        $this->char = 0;
        $this->EOF = strlen($data);
    }

    public function __toString()
    {
        return $this->data;
    }

    /**
     * Replace linefeed characters according to the spec.
     */
    protected function replaceLinefeeds($data)
    {
        /*
         * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially.
         * Any CR characters that are followed by LF characters must be removed, and any CR characters not
         * followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are
         * represented by LF characters, and there are never any CR characters in the input to the tokenization
         * stage.
         */
        $crlfTable = array(
            "\0" => "\xEF\xBF\xBD",
            "\r\n" => "\n",
            "\r" => "\n",
        );

        return strtr($data, $crlfTable);
    }

    /**
     * Returns the current line that the tokenizer is at.
     */
    public function currentLine()
    {
        if (empty($this->EOF) || 0 === $this->char) {
            return 1;
        }
        // Add one to $this->char because we want the number for the next
        // byte to be processed.
        return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
    }

    /**
     * @deprecated
     */
    public function getCurrentLine()
    {
        return $this->currentLine();
    }

    /**
     * Returns the current column of the current line that the tokenizer is at.
     * Newlines are column 0. The first char after a newline is column 1.
     *
     * @return int The column number.
     */
    public function columnOffset()
    {
        // Short circuit for the first char.
        if (0 === $this->char) {
            return 0;
        }
        // strrpos is weird, and the offset needs to be negative for what we
        // want (i.e., the last \n before $this->char). This needs to not have
        // one (to make it point to the next character, the one we want the
        // position of) added to it because strrpos's behaviour includes the
        // final offset byte.
        $backwardFrom = $this->char - 1 - strlen($this->data);
        $lastLine = strrpos($this->data, "\n", $backwardFrom);

        // However, for here we want the length up until the next byte to be
        // processed, so add one to the current byte ($this->char).
        if (false !== $lastLine) {
            $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
        } else {
            // After a newline.
            $findLengthOf = substr($this->data, 0, $this->char);
        }

        return UTF8Utils::countChars($findLengthOf);
    }

    /**
     * @deprecated
     */
    public function getColumnOffset()
    {
        return $this->columnOffset();
    }

    /**
     * Get the current character.
     *
     * @return string The current character.
     */
    public function current()
    {
        return $this->data[$this->char];
    }

    /**
     * Advance the pointer.
     * This is part of the Iterator interface.
     */
    public function next()
    {
        ++$this->char;
    }

    /**
     * Rewind to the start of the string.
     */
    public function rewind()
    {
        $this->char = 0;
    }

    /**
     * Is the current pointer location valid.
     *
     * @return bool Whether the current pointer location is valid.
     */
    public function valid()
    {
        return $this->char < $this->EOF;
    }

    /**
     * Get all characters until EOF.
     *
     * This reads to the end of the file, and sets the read marker at the
     * end of the file.
     *
     * Note this performs bounds checking.
     *
     * @return string Returns the remaining text. If called when the InputStream is
     *                already exhausted, it returns an empty string.
     */
    public function remainingChars()
    {
        if ($this->char < $this->EOF) {
            $data = substr($this->data, $this->char);
            $this->char = $this->EOF;

            return $data;
        }

        return ''; // false;
    }

    /**
     * Read to a particular match (or until $max bytes are consumed).
     *
     * This operates on byte sequences, not characters.
     *
     * Matches as far as possible until we reach a certain set of bytes
     * and returns the matched substring.
     *
     * @param string $bytes Bytes to match.
     * @param int    $max   Maximum number of bytes to scan.
     *
     * @return mixed Index or false if no match is found. You should use strong
     *               equality when checking the result, since index could be 0.
     */
    public function charsUntil($bytes, $max = null)
    {
        if ($this->char >= $this->EOF) {
            return false;
        }

        if (0 === $max || $max) {
            $len = strcspn($this->data, $bytes, $this->char, $max);
        } else {
            $len = strcspn($this->data, $bytes, $this->char);
        }

        $string = (string) substr($this->data, $this->char, $len);
        $this->char += $len;

        return $string;
    }

    /**
     * Returns the string so long as $bytes matches.
     *
     * Matches as far as possible with a certain set of bytes
     * and returns the matched substring.
     *
     * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
     *                      current char, the pointer advances and the char is part of the
     *                      substring.
     * @param int    $max   The max number of chars to read.
     *
     * @return string
     */
    public function charsWhile($bytes, $max = null)
    {
        if ($this->char >= $this->EOF) {
            return false;
        }

        if (0 === $max || $max) {
            $len = strspn($this->data, $bytes, $this->char, $max);
        } else {
            $len = strspn($this->data, $bytes, $this->char);
        }
        $string = (string) substr($this->data, $this->char, $len);
        $this->char += $len;

        return $string;
    }

    /**
     * Unconsume characters.
     *
     * @param int $howMany The number of characters to unconsume.
     */
    public function unconsume($howMany = 1)
    {
        if (($this->char - $howMany) >= 0) {
            $this->char -= $howMany;
        }
    }

    /**
     * Look ahead without moving cursor.
     */
    public function peek()
    {
        if (($this->char + 1) <= $this->EOF) {
            return $this->data[$this->char + 1];
        }

        return false;
    }

    public function key()
    {
        return $this->char;
    }
}
PK      ��ZP�>��  ��    Parser/Tokenizer.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

use Masterminds\HTML5\Elements;

/**
 * The HTML5 tokenizer.
 *
 * The tokenizer's role is reading data from the scanner and gathering it into
 * semantic units. From the tokenizer, data is emitted to an event handler,
 * which may (for example) create a DOM tree.
 *
 * The HTML5 specification has a detailed explanation of tokenizing HTML5. We
 * follow that specification to the maximum extent that we can. If you find
 * a discrepancy that is not documented, please file a bug and/or submit a
 * patch.
 *
 * This tokenizer is implemented as a recursive descent parser.
 *
 * Within the API documentation, you may see references to the specific section
 * of the HTML5 spec that the code attempts to reproduce. Example: 8.2.4.1.
 * This refers to section 8.2.4.1 of the HTML5 CR specification.
 *
 * @see http://www.w3.org/TR/2012/CR-html5-20121217/
 */
class Tokenizer
{
    protected $scanner;

    protected $events;

    protected $tok;

    /**
     * Buffer for text.
     */
    protected $text = '';

    // When this goes to false, the parser stops.
    protected $carryOn = true;

    protected $textMode = 0; // TEXTMODE_NORMAL;
    protected $untilTag = null;

    const CONFORMANT_XML = 'xml';
    const CONFORMANT_HTML = 'html';
    protected $mode = self::CONFORMANT_HTML;

    /**
     * Create a new tokenizer.
     *
     * Typically, parsing a document involves creating a new tokenizer, giving
     * it a scanner (input) and an event handler (output), and then calling
     * the Tokenizer::parse() method.`
     *
     * @param Scanner      $scanner      A scanner initialized with an input stream.
     * @param EventHandler $eventHandler An event handler, initialized and ready to receive events.
     * @param string       $mode
     */
    public function __construct($scanner, $eventHandler, $mode = self::CONFORMANT_HTML)
    {
        $this->scanner = $scanner;
        $this->events = $eventHandler;
        $this->mode = $mode;
    }

    /**
     * Begin parsing.
     *
     * This will begin scanning the document, tokenizing as it goes.
     * Tokens are emitted into the event handler.
     *
     * Tokenizing will continue until the document is completely
     * read. Errors are emitted into the event handler, but
     * the parser will attempt to continue parsing until the
     * entire input stream is read.
     */
    public function parse()
    {
        do {
            $this->consumeData();
            // FIXME: Add infinite loop protection.
        } while ($this->carryOn);
    }

    /**
     * Set the text mode for the character data reader.
     *
     * HTML5 defines three different modes for reading text:
     * - Normal: Read until a tag is encountered.
     * - RCDATA: Read until a tag is encountered, but skip a few otherwise-
     * special characters.
     * - Raw: Read until a special closing tag is encountered (viz. pre, script)
     *
     * This allows those modes to be set.
     *
     * Normally, setting is done by the event handler via a special return code on
     * startTag(), but it can also be set manually using this function.
     *
     * @param int    $textmode One of Elements::TEXT_*.
     * @param string $untilTag The tag that should stop RAW or RCDATA mode. Normal mode does not
     *                         use this indicator.
     */
    public function setTextMode($textmode, $untilTag = null)
    {
        $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA);
        $this->untilTag = $untilTag;
    }

    /**
     * Consume a character and make a move.
     * HTML5 8.2.4.1.
     */
    protected function consumeData()
    {
        $tok = $this->scanner->current();

        if ('&' === $tok) {
            // Character reference
            $ref = $this->decodeCharacterReference();
            $this->buffer($ref);

            $tok = $this->scanner->current();
        }

        // Parse tag
        if ('<' === $tok) {
            // Any buffered text data can go out now.
            $this->flushBuffer();

            $tok = $this->scanner->next();

            if (false === $tok) {
                // end of string
                $this->parseError('Illegal tag opening');
            } elseif ('!' === $tok) {
                $this->markupDeclaration();
            } elseif ('/' === $tok) {
                $this->endTag();
            } elseif ('?' === $tok) {
                $this->processingInstruction();
            } elseif ($this->is_alpha($tok)) {
                $this->tagName();
            } else {
                $this->parseError('Illegal tag opening');
                // TODO is this necessary ?
                $this->characterData();
            }

            $tok = $this->scanner->current();
        }

        if (false === $tok) {
            // Handle end of document
            $this->eof();
        } else {
            // Parse character
            switch ($this->textMode) {
                case Elements::TEXT_RAW:
                    $this->rawText($tok);
                    break;

                case Elements::TEXT_RCDATA:
                    $this->rcdata($tok);
                    break;

                default:
                    if ('<' === $tok || '&' === $tok) {
                        break;
                    }

                    // NULL character
                    if ("\00" === $tok) {
                        $this->parseError('Received null character.');

                        $this->text .= $tok;
                        $this->scanner->consume();

                        break;
                    }

                    $this->text .= $this->scanner->charsUntil("<&\0");
            }
        }

        return $this->carryOn;
    }

    /**
     * Parse anything that looks like character data.
     *
     * Different rules apply based on the current text mode.
     *
     * @see Elements::TEXT_RAW Elements::TEXT_RCDATA.
     */
    protected function characterData()
    {
        $tok = $this->scanner->current();
        if (false === $tok) {
            return false;
        }
        switch ($this->textMode) {
            case Elements::TEXT_RAW:
                return $this->rawText($tok);
            case Elements::TEXT_RCDATA:
                return $this->rcdata($tok);
            default:
                if ('<' === $tok || '&' === $tok) {
                    return false;
                }

                return $this->text($tok);
        }
    }

    /**
     * This buffers the current token as character data.
     *
     * @param string $tok The current token.
     *
     * @return bool
     */
    protected function text($tok)
    {
        // This should never happen...
        if (false === $tok) {
            return false;
        }

        // NULL character
        if ("\00" === $tok) {
            $this->parseError('Received null character.');
        }

        $this->buffer($tok);
        $this->scanner->consume();

        return true;
    }

    /**
     * Read text in RAW mode.
     *
     * @param string $tok The current token.
     *
     * @return bool
     */
    protected function rawText($tok)
    {
        if (is_null($this->untilTag)) {
            return $this->text($tok);
        }

        $sequence = '</' . $this->untilTag . '>';
        $txt = $this->readUntilSequence($sequence);
        $this->events->text($txt);
        $this->setTextMode(0);

        return $this->endTag();
    }

    /**
     * Read text in RCDATA mode.
     *
     * @param string $tok The current token.
     *
     * @return bool
     */
    protected function rcdata($tok)
    {
        if (is_null($this->untilTag)) {
            return $this->text($tok);
        }

        $sequence = '</' . $this->untilTag;
        $txt = '';

        $caseSensitive = !Elements::isHtml5Element($this->untilTag);
        while (false !== $tok && !('<' == $tok && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) {
            if ('&' == $tok) {
                $txt .= $this->decodeCharacterReference();
                $tok = $this->scanner->current();
            } else {
                $txt .= $tok;
                $tok = $this->scanner->next();
            }
        }
        $len = strlen($sequence);
        $this->scanner->consume($len);
        $len += $this->scanner->whitespace();
        if ('>' !== $this->scanner->current()) {
            $this->parseError('Unclosed RCDATA end tag');
        }

        $this->scanner->unconsume($len);
        $this->events->text($txt);
        $this->setTextMode(0);

        return $this->endTag();
    }

    /**
     * If the document is read, emit an EOF event.
     */
    protected function eof()
    {
        // fprintf(STDOUT, "EOF");
        $this->flushBuffer();
        $this->events->eof();
        $this->carryOn = false;
    }

    /**
     * Look for markup.
     */
    protected function markupDeclaration()
    {
        $tok = $this->scanner->next();

        // Comment:
        if ('-' == $tok && '-' == $this->scanner->peek()) {
            $this->scanner->consume(2);

            return $this->comment();
        } elseif ('D' == $tok || 'd' == $tok) { // Doctype
            return $this->doctype();
        } elseif ('[' == $tok) { // CDATA section
            return $this->cdataSection();
        }

        // FINISH
        $this->parseError('Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s', $tok);
        $this->bogusComment('<!');

        return true;
    }

    /**
     * Consume an end tag. See section 8.2.4.9.
     */
    protected function endTag()
    {
        if ('/' != $this->scanner->current()) {
            return false;
        }
        $tok = $this->scanner->next();

        // a-zA-Z -> tagname
        // > -> parse error
        // EOF -> parse error
        // -> parse error
        if (!$this->is_alpha($tok)) {
            $this->parseError("Expected tag name, got '%s'", $tok);
            if ("\0" == $tok || false === $tok) {
                return false;
            }

            return $this->bogusComment('</');
        }

        $name = $this->scanner->charsUntil("\n\f \t>");
        $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
        // Trash whitespace.
        $this->scanner->whitespace();

        $tok = $this->scanner->current();
        if ('>' != $tok) {
            $this->parseError("Expected >, got '%s'", $tok);
            // We just trash stuff until we get to the next tag close.
            $this->scanner->charsUntil('>');
        }

        $this->events->endTag($name);
        $this->scanner->consume();

        return true;
    }

    /**
     * Consume a tag name and body. See section 8.2.4.10.
     */
    protected function tagName()
    {
        // We know this is at least one char.
        $name = $this->scanner->charsWhile(':_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz');
        $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
        $attributes = array();
        $selfClose = false;

        // Handle attribute parse exceptions here so that we can
        // react by trying to build a sensible parse tree.
        try {
            do {
                $this->scanner->whitespace();
                $this->attribute($attributes);
            } while (!$this->isTagEnd($selfClose));
        } catch (ParseError $e) {
            $selfClose = false;
        }

        $mode = $this->events->startTag($name, $attributes, $selfClose);

        if (is_int($mode)) {
            $this->setTextMode($mode, $name);
        }

        $this->scanner->consume();

        return true;
    }

    /**
     * Check if the scanner has reached the end of a tag.
     */
    protected function isTagEnd(&$selfClose)
    {
        $tok = $this->scanner->current();
        if ('/' == $tok) {
            $this->scanner->consume();
            $this->scanner->whitespace();
            $tok = $this->scanner->current();

            if ('>' == $tok) {
                $selfClose = true;

                return true;
            }
            if (false === $tok) {
                $this->parseError('Unexpected EOF inside of tag.');

                return true;
            }
            // Basically, we skip the / token and go on.
            // See 8.2.4.43.
            $this->parseError("Unexpected '%s' inside of a tag.", $tok);

            return false;
        }

        if ('>' == $tok) {
            return true;
        }
        if (false === $tok) {
            $this->parseError('Unexpected EOF inside of tag.');

            return true;
        }

        return false;
    }

    /**
     * Parse attributes from inside of a tag.
     *
     * @param string[] $attributes
     *
     * @return bool
     *
     * @throws ParseError
     */
    protected function attribute(&$attributes)
    {
        $tok = $this->scanner->current();
        if ('/' == $tok || '>' == $tok || false === $tok) {
            return false;
        }

        if ('<' == $tok) {
            $this->parseError("Unexpected '<' inside of attributes list.");
            // Push the < back onto the stack.
            $this->scanner->unconsume();
            // Let the caller figure out how to handle this.
            throw new ParseError('Start tag inside of attribute.');
        }

        $name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));

        if (0 == strlen($name)) {
            $tok = $this->scanner->current();
            $this->parseError('Expected an attribute name, got %s.', $tok);
            // Really, only '=' can be the char here. Everything else gets absorbed
            // under one rule or another.
            $name = $tok;
            $this->scanner->consume();
        }

        $isValidAttribute = true;
        // Attribute names can contain most Unicode characters for HTML5.
        // But method "DOMElement::setAttribute" is throwing exception
        // because of it's own internal restriction so these have to be filtered.
        // see issue #23: https://github.com/Masterminds/html5-php/issues/23
        // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
        if (preg_match("/[\x1-\x2C\\/\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/u", $name)) {
            $this->parseError('Unexpected characters in attribute name: %s', $name);
            $isValidAttribute = false;
        }         // There is no limitation for 1st character in HTML5.
        // But method "DOMElement::setAttribute" is throwing exception for the
        // characters below so they have to be filtered.
        // see issue #23: https://github.com/Masterminds/html5-php/issues/23
        // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
        elseif (preg_match('/^[0-9.-]/u', $name)) {
            $this->parseError('Unexpected character at the begining of attribute name: %s', $name);
            $isValidAttribute = false;
        }
        // 8.1.2.3
        $this->scanner->whitespace();

        $val = $this->attributeValue();
        if ($isValidAttribute) {
            $attributes[$name] = $val;
        }

        return true;
    }

    /**
     * Consume an attribute value. See section 8.2.4.37 and after.
     *
     * @return string|null
     */
    protected function attributeValue()
    {
        if ('=' != $this->scanner->current()) {
            return null;
        }
        $this->scanner->consume();
        // 8.1.2.3
        $this->scanner->whitespace();

        $tok = $this->scanner->current();
        switch ($tok) {
            case "\n":
            case "\f":
            case ' ':
            case "\t":
                // Whitespace here indicates an empty value.
                return null;
            case '"':
            case "'":
                $this->scanner->consume();

                return $this->quotedAttributeValue($tok);
            case '>':
                // case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
                $this->parseError('Expected attribute value, got tag end.');

                return null;
            case '=':
            case '`':
                $this->parseError('Expecting quotes, got %s.', $tok);

                return $this->unquotedAttributeValue();
            default:
                return $this->unquotedAttributeValue();
        }
    }

    /**
     * Get an attribute value string.
     *
     * @param string $quote IMPORTANT: This is a series of chars! Any one of which will be considered
     *                      termination of an attribute's value. E.g. "\"'" will stop at either
     *                      ' or ".
     *
     * @return string The attribute value.
     */
    protected function quotedAttributeValue($quote)
    {
        $stoplist = "\f" . $quote;
        $val = '';

        while (true) {
            $tokens = $this->scanner->charsUntil($stoplist . '&');
            if (false !== $tokens) {
                $val .= $tokens;
            } else {
                break;
            }

            $tok = $this->scanner->current();
            if ('&' == $tok) {
                $val .= $this->decodeCharacterReference(true);
                continue;
            }
            break;
        }
        $this->scanner->consume();

        return $val;
    }

    protected function unquotedAttributeValue()
    {
        $val = '';
        $tok = $this->scanner->current();
        while (false !== $tok) {
            switch ($tok) {
                case "\n":
                case "\f":
                case ' ':
                case "\t":
                case '>':
                    break 2;

                case '&':
                    $val .= $this->decodeCharacterReference(true);
                    $tok = $this->scanner->current();

                    break;

                case "'":
                case '"':
                case '<':
                case '=':
                case '`':
                    $this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
                    $val .= $tok;
                    $tok = $this->scanner->next();
                    break;

                default:
                    $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`");

                    $tok = $this->scanner->current();
            }
        }

        return $val;
    }

    /**
     * Consume malformed markup as if it were a comment.
     * 8.2.4.44.
     *
     * The spec requires that the ENTIRE tag-like thing be enclosed inside of
     * the comment. So this will generate comments like:
     *
     * &lt;!--&lt/+foo&gt;--&gt;
     *
     * @param string $leading Prepend any leading characters. This essentially
     *                        negates the need to backtrack, but it's sort of a hack.
     *
     * @return bool
     */
    protected function bogusComment($leading = '')
    {
        $comment = $leading;
        $tokens = $this->scanner->charsUntil('>');
        if (false !== $tokens) {
            $comment .= $tokens;
        }
        $tok = $this->scanner->current();
        if (false !== $tok) {
            $comment .= $tok;
        }

        $this->flushBuffer();
        $this->events->comment($comment);
        $this->scanner->consume();

        return true;
    }

    /**
     * Read a comment.
     * Expects the first tok to be inside of the comment.
     *
     * @return bool
     */
    protected function comment()
    {
        $tok = $this->scanner->current();
        $comment = '';

        // <!-->. Emit an empty comment because 8.2.4.46 says to.
        if ('>' == $tok) {
            // Parse error. Emit the comment token.
            $this->parseError("Expected comment data, got '>'");
            $this->events->comment('');
            $this->scanner->consume();

            return true;
        }

        // Replace NULL with the replacement char.
        if ("\0" == $tok) {
            $tok = UTF8Utils::FFFD;
        }
        while (!$this->isCommentEnd()) {
            $comment .= $tok;
            $tok = $this->scanner->next();
        }

        $this->events->comment($comment);
        $this->scanner->consume();

        return true;
    }

    /**
     * Check if the scanner has reached the end of a comment.
     *
     * @return bool
     */
    protected function isCommentEnd()
    {
        $tok = $this->scanner->current();

        // EOF
        if (false === $tok) {
            // Hit the end.
            $this->parseError('Unexpected EOF in a comment.');

            return true;
        }

        // If next two tokens are not '--', not the end.
        if ('-' != $tok || '-' != $this->scanner->peek()) {
            return false;
        }

        $this->scanner->consume(2); // Consume '-' and one of '!' or '>'

        // Test for '>'
        if ('>' == $this->scanner->current()) {
            return true;
        }
        // Test for '!>'
        if ('!' == $this->scanner->current() && '>' == $this->scanner->peek()) {
            $this->scanner->consume(); // Consume the last '>'
            return true;
        }
        // Unread '-' and one of '!' or '>';
        $this->scanner->unconsume(2);

        return false;
    }

    /**
     * Parse a DOCTYPE.
     *
     * Parse a DOCTYPE declaration. This method has strong bearing on whether or
     * not Quirksmode is enabled on the event handler.
     *
     * @todo This method is a little long. Should probably refactor.
     *
     * @return bool
     */
    protected function doctype()
    {
        // Check that string is DOCTYPE.
        if ($this->scanner->sequenceMatches('DOCTYPE', false)) {
            $this->scanner->consume(7);
        } else {
            $chars = $this->scanner->charsWhile('DOCTYPEdoctype');
            $this->parseError('Expected DOCTYPE, got %s', $chars);

            return $this->bogusComment('<!' . $chars);
        }

        $this->scanner->whitespace();
        $tok = $this->scanner->current();

        // EOF: die.
        if (false === $tok) {
            $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
            $this->eof();

            return true;
        }

        // NULL char: convert.
        if ("\0" === $tok) {
            $this->parseError('Unexpected null character in DOCTYPE.');
        }

        $stop = " \n\f>";
        $doctypeName = $this->scanner->charsUntil($stop);
        // Lowercase ASCII, replace \0 with FFFD
        $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD));

        $tok = $this->scanner->current();

        // If false, emit a parse error, DOCTYPE, and return.
        if (false === $tok) {
            $this->parseError('Unexpected EOF in DOCTYPE declaration.');
            $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true);

            return true;
        }

        // Short DOCTYPE, like <!DOCTYPE html>
        if ('>' == $tok) {
            // DOCTYPE without a name.
            if (0 == strlen($doctypeName)) {
                $this->parseError('Expected a DOCTYPE name. Got nothing.');
                $this->events->doctype($doctypeName, 0, null, true);
                $this->scanner->consume();

                return true;
            }
            $this->events->doctype($doctypeName);
            $this->scanner->consume();

            return true;
        }
        $this->scanner->whitespace();

        $pub = strtoupper($this->scanner->getAsciiAlpha());
        $white = $this->scanner->whitespace();

        // Get ID, and flag it as pub or system.
        if (('PUBLIC' == $pub || 'SYSTEM' == $pub) && $white > 0) {
            // Get the sys ID.
            $type = 'PUBLIC' == $pub ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
            $id = $this->quotedString("\0>");
            if (false === $id) {
                $this->events->doctype($doctypeName, $type, $pub, false);

                return true;
            }

            // Premature EOF.
            if (false === $this->scanner->current()) {
                $this->parseError('Unexpected EOF in DOCTYPE');
                $this->events->doctype($doctypeName, $type, $id, true);

                return true;
            }

            // Well-formed complete DOCTYPE.
            $this->scanner->whitespace();
            if ('>' == $this->scanner->current()) {
                $this->events->doctype($doctypeName, $type, $id, false);
                $this->scanner->consume();

                return true;
            }

            // If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
            // Throw away the junk, parse error, quirks mode, return true.
            $this->scanner->charsUntil('>');
            $this->parseError('Malformed DOCTYPE.');
            $this->events->doctype($doctypeName, $type, $id, true);
            $this->scanner->consume();

            return true;
        }

        // Else it's a bogus DOCTYPE.
        // Consume to > and trash.
        $this->scanner->charsUntil('>');

        $this->parseError('Expected PUBLIC or SYSTEM. Got %s.', $pub);
        $this->events->doctype($doctypeName, 0, null, true);
        $this->scanner->consume();

        return true;
    }

    /**
     * Utility for reading a quoted string.
     *
     * @param string $stopchars Characters (in addition to a close-quote) that should stop the string.
     *                          E.g. sometimes '>' is higher precedence than '"' or "'".
     *
     * @return mixed String if one is found (quotations omitted).
     */
    protected function quotedString($stopchars)
    {
        $tok = $this->scanner->current();
        if ('"' == $tok || "'" == $tok) {
            $this->scanner->consume();
            $ret = $this->scanner->charsUntil($tok . $stopchars);
            if ($this->scanner->current() == $tok) {
                $this->scanner->consume();
            } else {
                // Parse error because no close quote.
                $this->parseError('Expected %s, got %s', $tok, $this->scanner->current());
            }

            return $ret;
        }

        return false;
    }

    /**
     * Handle a CDATA section.
     *
     * @return bool
     */
    protected function cdataSection()
    {
        $cdata = '';
        $this->scanner->consume();

        $chars = $this->scanner->charsWhile('CDAT');
        if ('CDATA' != $chars || '[' != $this->scanner->current()) {
            $this->parseError('Expected [CDATA[, got %s', $chars);

            return $this->bogusComment('<![' . $chars);
        }

        $tok = $this->scanner->next();
        do {
            if (false === $tok) {
                $this->parseError('Unexpected EOF inside CDATA.');
                $this->bogusComment('<![CDATA[' . $cdata);

                return true;
            }
            $cdata .= $tok;
            $tok = $this->scanner->next();
        } while (!$this->scanner->sequenceMatches(']]>'));

        // Consume ]]>
        $this->scanner->consume(3);

        $this->events->cdata($cdata);

        return true;
    }

    // ================================================================
    // Non-HTML5
    // ================================================================

    /**
     * Handle a processing instruction.
     *
     * XML processing instructions are supposed to be ignored in HTML5,
     * treated as "bogus comments". However, since we're not a user
     * agent, we allow them. We consume until ?> and then issue a
     * EventListener::processingInstruction() event.
     *
     * @return bool
     */
    protected function processingInstruction()
    {
        if ('?' != $this->scanner->current()) {
            return false;
        }

        $tok = $this->scanner->next();
        $procName = $this->scanner->getAsciiAlpha();
        $white = $this->scanner->whitespace();

        // If not a PI, send to bogusComment.
        if (0 == strlen($procName) || 0 == $white || false == $this->scanner->current()) {
            $this->parseError("Expected processing instruction name, got $tok");
            $this->bogusComment('<?' . $tok . $procName);

            return true;
        }

        $data = '';
        // As long as it's not the case that the next two chars are ? and >.
        while (!('?' == $this->scanner->current() && '>' == $this->scanner->peek())) {
            $data .= $this->scanner->current();

            $tok = $this->scanner->next();
            if (false === $tok) {
                $this->parseError('Unexpected EOF in processing instruction.');
                $this->events->processingInstruction($procName, $data);

                return true;
            }
        }

        $this->scanner->consume(2); // Consume the closing tag
        $this->events->processingInstruction($procName, $data);

        return true;
    }

    // ================================================================
    // UTILITY FUNCTIONS
    // ================================================================

    /**
     * Read from the input stream until we get to the desired sequene
     * or hit the end of the input stream.
     *
     * @param string $sequence
     *
     * @return string
     */
    protected function readUntilSequence($sequence)
    {
        $buffer = '';

        // Optimization for reading larger blocks faster.
        $first = substr($sequence, 0, 1);
        while (false !== $this->scanner->current()) {
            $buffer .= $this->scanner->charsUntil($first);

            // Stop as soon as we hit the stopping condition.
            if ($this->scanner->sequenceMatches($sequence, false)) {
                return $buffer;
            }
            $buffer .= $this->scanner->current();
            $this->scanner->consume();
        }

        // If we get here, we hit the EOF.
        $this->parseError('Unexpected EOF during text read.');

        return $buffer;
    }

    /**
     * Check if upcomming chars match the given sequence.
     *
     * This will read the stream for the $sequence. If it's
     * found, this will return true. If not, return false.
     * Since this unconsumes any chars it reads, the caller
     * will still need to read the next sequence, even if
     * this returns true.
     *
     * Example: $this->scanner->sequenceMatches('</script>') will
     * see if the input stream is at the start of a
     * '</script>' string.
     *
     * @param string $sequence
     * @param bool   $caseSensitive
     *
     * @return bool
     */
    protected function sequenceMatches($sequence, $caseSensitive = true)
    {
        @trigger_error(__METHOD__ . ' method is deprecated since version 2.4 and will be removed in 3.0. Use Scanner::sequenceMatches() instead.', E_USER_DEPRECATED);

        return $this->scanner->sequenceMatches($sequence, $caseSensitive);
    }

    /**
     * Send a TEXT event with the contents of the text buffer.
     *
     * This emits an EventHandler::text() event with the current contents of the
     * temporary text buffer. (The buffer is used to group as much PCDATA
     * as we can instead of emitting lots and lots of TEXT events.)
     */
    protected function flushBuffer()
    {
        if ('' === $this->text) {
            return;
        }
        $this->events->text($this->text);
        $this->text = '';
    }

    /**
     * Add text to the temporary buffer.
     *
     * @see flushBuffer()
     *
     * @param string $str
     */
    protected function buffer($str)
    {
        $this->text .= $str;
    }

    /**
     * Emit a parse error.
     *
     * A parse error always returns false because it never consumes any
     * characters.
     *
     * @param string $msg
     *
     * @return string
     */
    protected function parseError($msg)
    {
        $args = func_get_args();

        if (count($args) > 1) {
            array_shift($args);
            $msg = vsprintf($msg, $args);
        }

        $line = $this->scanner->currentLine();
        $col = $this->scanner->columnOffset();
        $this->events->parseError($msg, $line, $col);

        return false;
    }

    /**
     * Decode a character reference and return the string.
     *
     * If $inAttribute is set to true, a bare & will be returned as-is.
     *
     * @param bool $inAttribute Set to true if the text is inside of an attribute value.
     *                          false otherwise.
     *
     * @return string
     */
    protected function decodeCharacterReference($inAttribute = false)
    {
        // Next char after &.
        $tok = $this->scanner->next();
        $start = $this->scanner->position();

        if (false === $tok) {
            return '&';
        }

        // These indicate not an entity. We return just
        // the &.
        if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
            // $this->scanner->next();
            return '&';
        }

        // Numeric entity
        if ('#' === $tok) {
            $tok = $this->scanner->next();

            if (false === $tok) {
                $this->parseError('Expected &#DEC; &#HEX;, got EOF');
                $this->scanner->unconsume(1);

                return '&';
            }

            // Hexidecimal encoding.
            // X[0-9a-fA-F]+;
            // x[0-9a-fA-F]+;
            if ('x' === $tok || 'X' === $tok) {
                $tok = $this->scanner->next(); // Consume x

                // Convert from hex code to char.
                $hex = $this->scanner->getHex();
                if (empty($hex)) {
                    $this->parseError('Expected &#xHEX;, got &#x%s', $tok);
                    // We unconsume because we don't know what parser rules might
                    // be in effect for the remaining chars. For example. '&#>'
                    // might result in a specific parsing rule inside of tag
                    // contexts, while not inside of pcdata context.
                    $this->scanner->unconsume(2);

                    return '&';
                }
                $entity = CharacterReference::lookupHex($hex);
            }             // Decimal encoding.
            // [0-9]+;
            else {
                // Convert from decimal to char.
                $numeric = $this->scanner->getNumeric();
                if (false === $numeric) {
                    $this->parseError('Expected &#DIGITS;, got &#%s', $tok);
                    $this->scanner->unconsume(2);

                    return '&';
                }
                $entity = CharacterReference::lookupDecimal($numeric);
            }
        } elseif ('=' === $tok && $inAttribute) {
            return '&';
        } else { // String entity.
            // Attempt to consume a string up to a ';'.
            // [a-zA-Z0-9]+;
            $cname = $this->scanner->getAsciiAlphaNum();
            $entity = CharacterReference::lookupName($cname);

            // When no entity is found provide the name of the unmatched string
            // and continue on as the & is not part of an entity. The & will
            // be converted to &amp; elsewhere.
            if (null === $entity) {
                if (!$inAttribute || '' === $cname) {
                    $this->parseError("No match in entity table for '%s'", $cname);
                }
                $this->scanner->unconsume($this->scanner->position() - $start);

                return '&';
            }
        }

        // The scanner has advanced the cursor for us.
        $tok = $this->scanner->current();

        // We have an entity. We're done here.
        if (';' === $tok) {
            $this->scanner->consume();

            return $entity;
        }

        // Failing to match ; means unconsume the entire string.
        $this->scanner->unconsume($this->scanner->position() - $start);

        $this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok);

        return '&';
    }

    /**
     * Checks whether a (single-byte) character is an ASCII letter or not.
     *
     * @param string $input A single-byte string
     *
     * @return bool True if it is a letter, False otherwise
     */
    protected function is_alpha($input)
    {
        $code = ord($input);

        return ($code >= 97 && $code <= 122) || ($code >= 65 && $code <= 90);
    }
}
PK      ��Z	3sg\  \    Parser/EventHandler.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/**
 * Standard events for HTML5.
 *
 * This is roughly analogous to a SAX2 or expat-style interface.
 * However, it is tuned specifically for HTML5, according to section 8
 * of the HTML5 specification.
 *
 * An event handler receives parser events. For a concrete
 * implementation, see DOMTreeBuilder.
 *
 * Quirks support in the parser is limited to close-in syntax (malformed
 * tags or attributes). Higher order syntax and semantic issues with a
 * document (e.g. mismatched tags, illegal nesting, etc.) are the
 * responsibility of the event handler implementation.
 *
 * See HTML5 spec section 8.2.4
 */
interface EventHandler
{
    const DOCTYPE_NONE = 0;

    const DOCTYPE_PUBLIC = 1;

    const DOCTYPE_SYSTEM = 2;

    /**
     * A doctype declaration.
     *
     * @param string $name   The name of the root element.
     * @param int    $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
     * @param string $id     The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
     *                       then this is a system ID.
     * @param bool   $quirks Indicates whether the builder should enter quirks mode.
     */
    public function doctype($name, $idType = 0, $id = null, $quirks = false);

    /**
     * A start tag.
     *
     * IMPORTANT: The parser watches the return value of this event. If this returns
     * an integer, the parser will switch TEXTMODE patters according to the int.
     *
     * This is how the Tree Builder can tell the Tokenizer when a certain tag should
     * cause the parser to go into RAW text mode.
     *
     * The HTML5 standard requires that the builder is the one that initiates this
     * step, and this is the only way short of a circular reference that we can
     * do that.
     *
     * Example: if a startTag even for a `script` name is fired, and the startTag()
     * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
     * switch into RAW text mode and consume data until it reaches a closing
     * `script` tag.
     *
     * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
     * closing tag is encounter. **This behavior may change.**
     *
     * @param string $name        The tag name.
     * @param array  $attributes  An array with all of the tag's attributes.
     * @param bool   $selfClosing An indicator of whether or not this tag is self-closing (<foo/>).
     *
     * @return int one of the Tokenizer::TEXTMODE_* constants
     */
    public function startTag($name, $attributes = array(), $selfClosing = false);

    /**
     * An end-tag.
     */
    public function endTag($name);

    /**
     * A comment section (unparsed character data).
     */
    public function comment($cdata);

    /**
     * A unit of parsed character data.
     *
     * Entities in this text are *already decoded*.
     */
    public function text($cdata);

    /**
     * Indicates that the document has been entirely processed.
     */
    public function eof();

    /**
     * Emitted when the parser encounters an error condition.
     */
    public function parseError($msg, $line, $col);

    /**
     * A CDATA section.
     *
     * @param string $data
     *                     The unparsed character data
     */
    public function cdata($data);

    /**
     * This is a holdover from the XML spec.
     *
     * While user agents don't get PIs, server-side does.
     *
     * @param string $name The name of the processor (e.g. 'php').
     * @param string $data The unparsed data.
     */
    public function processingInstruction($name, $data = null);
}
PK      ��Zr5u�  �    Parser/TreeBuildingRules.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/**
 * Handles special-case rules for the DOM tree builder.
 *
 * Many tags have special rules that need to be accomodated on an
 * individual basis. This class handles those rules.
 *
 * See section 8.1.2.4 of the spec.
 *
 * @todo - colgroup and col special behaviors
 *       - body and head special behaviors
 */
class TreeBuildingRules
{
    protected static $tags = array(
        'li' => 1,
        'dd' => 1,
        'dt' => 1,
        'rt' => 1,
        'rp' => 1,
        'tr' => 1,
        'th' => 1,
        'td' => 1,
        'thead' => 1,
        'tfoot' => 1,
        'tbody' => 1,
        'table' => 1,
        'optgroup' => 1,
        'option' => 1,
    );

    /**
     * Returns true if the given tagname has special processing rules.
     */
    public function hasRules($tagname)
    {
        return isset(static::$tags[$tagname]);
    }

    /**
     * Evaluate the rule for the current tag name.
     *
     * This may modify the existing DOM.
     *
     * @return \DOMElement The new Current DOM element.
     */
    public function evaluate($new, $current)
    {
        switch ($new->tagName) {
            case 'li':
                return $this->handleLI($new, $current);
            case 'dt':
            case 'dd':
                return $this->handleDT($new, $current);
            case 'rt':
            case 'rp':
                return $this->handleRT($new, $current);
            case 'optgroup':
                return $this->closeIfCurrentMatches($new, $current, array(
                    'optgroup',
                ));
            case 'option':
                return $this->closeIfCurrentMatches($new, $current, array(
                    'option',
                ));
            case 'tr':
                return $this->closeIfCurrentMatches($new, $current, array(
                    'tr',
                ));
            case 'td':
            case 'th':
                return $this->closeIfCurrentMatches($new, $current, array(
                    'th',
                    'td',
                ));
            case 'tbody':
            case 'thead':
            case 'tfoot':
            case 'table': // Spec isn't explicit about this, but it's necessary.

                return $this->closeIfCurrentMatches($new, $current, array(
                    'thead',
                    'tfoot',
                    'tbody',
                ));
        }

        return $current;
    }

    protected function handleLI($ele, $current)
    {
        return $this->closeIfCurrentMatches($ele, $current, array(
            'li',
        ));
    }

    protected function handleDT($ele, $current)
    {
        return $this->closeIfCurrentMatches($ele, $current, array(
            'dt',
            'dd',
        ));
    }

    protected function handleRT($ele, $current)
    {
        return $this->closeIfCurrentMatches($ele, $current, array(
            'rt',
            'rp',
        ));
    }

    protected function closeIfCurrentMatches($ele, $current, $match)
    {
        if (in_array($current->tagName, $match, true)) {
            $current->parentNode->appendChild($ele);
        } else {
            $current->appendChild($ele);
        }

        return $ele;
    }
}
PK      ��Z�w�[�  �    Parser/FileInputStream.phpnu W+A��        <?php

namespace Masterminds\HTML5\Parser;

/**
 * The FileInputStream loads a file to be parsed.
 *
 * So right now we read files into strings and then process the
 * string. We chose to do this largely for the sake of expediency of
 * development, and also because we could optimize toward processing
 * arbitrarily large chunks of the input. But in the future, we'd
 * really like to rewrite this class to efficiently handle lower level
 * stream reads (and thus efficiently handle large documents).
 *
 * @deprecated since 2.4, to remove in 3.0. Use a string in the scanner instead.
 */
class FileInputStream extends StringInputStream implements InputStream
{
    /**
     * Load a file input stream.
     *
     * @param string $data     The file or url path to load.
     * @param string $encoding The encoding to use for the data.
     * @param string $debug    A fprintf format to use to echo the data on stdout.
     */
    public function __construct($data, $encoding = 'UTF-8', $debug = '')
    {
        // Get the contents of the file.
        $content = file_get_contents($data);

        parent::__construct($content, $encoding, $debug);
    }
}
PK        ��Z)��%�@  �@                  Serializer/OutputRules.phpnu W+A��        PK        ��Z�l�E�  �              4A  Serializer/Traverser.phpnu W+A��        PK        ��Z	�n��  �              IR  Serializer/HTML5Entities.phpnu W+A��        PK        ��Ze%ա  �              ��  Serializer/README.mdnu W+A��        PK        ��Z{�7
  7
              ~ Serializer/RulesInterface.phpnu W+A��        PK        ��Z��e�   �                Exception.phpnu W+A��        PK        ��ZȍRx�  �              � Entities.phpnu W+A��        PK        ��ZP�k��  �              � InstructionProcessor.phpnu W+A��        PK        ��Z::)�EM  EM              K Elements.phpnu W+A��        PK        ��Z�� ��	  �	              �Q Parser/InputStream.phpnu W+A��        PK        ��Z�:�X�   �               �[ Parser/ParseError.phpnu W+A��        PK        ��Z �w"Y  Y              �\ Parser/DOMTreeBuilder.phpnu W+A��        PK        ��Z���G�  �              � Parser/CharacterReference.phpnu W+A��        PK        ��Z�����,  �,              4� Parser/Scanner.phpnu W+A��        PK        ��Z0*rl  l              :� Parser/UTF8Utils.phpnu W+A��        PK        ��Z�%Ʉ�  �              � Parser/README.mdnu W+A��        PK        ��Z��ز:%  :%              � Parser/StringInputStream.phpnu W+A��        PK        ��ZP�>��  ��              S2 Parser/Tokenizer.phpnu W+A��        PK        ��Z	3sg\  \              R� Parser/EventHandler.phpnu W+A��        PK        ��Zr5u�  �              �� Parser/TreeBuildingRules.phpnu W+A��        PK        ��Z�w�[�  �              
� Parser/FileInputStream.phpnu W+A��        PK        ��   