From 539f518f64982bc490e01c556cc63b37e6450f64 Mon Sep 17 00:00:00 2001 From: Rostislav Wolny Date: Wed, 1 Dec 2021 13:19:29 +0100 Subject: [PATCH] Move pquery to 3rd party libs The project is abandoned so the easiest way for patching it is adding it to the lib-3rd-party. [MAILPOE-3980] --- composer.json | 3 +- composer.lock | 54 +- lib-3rd-party/pquery/IQuery.php | 151 + lib-3rd-party/pquery/LICENSE | 502 +++ lib-3rd-party/pquery/gan_formatter.php | 381 +++ lib-3rd-party/pquery/gan_node_html.php | 2855 +++++++++++++++++ lib-3rd-party/pquery/gan_parser_html.php | 839 +++++ lib-3rd-party/pquery/gan_selector_html.php | 949 ++++++ lib-3rd-party/pquery/gan_tokenizer.php | 566 ++++ lib-3rd-party/pquery/gan_xml2array.php | 101 + lib-3rd-party/pquery/ganon.php | 101 + lib-3rd-party/pquery/pQuery.php | 279 ++ .../pquery/third_party/jsminplus.php | 2086 ++++++++++++ 13 files changed, 8812 insertions(+), 55 deletions(-) create mode 100644 lib-3rd-party/pquery/IQuery.php create mode 100644 lib-3rd-party/pquery/LICENSE create mode 100644 lib-3rd-party/pquery/gan_formatter.php create mode 100644 lib-3rd-party/pquery/gan_node_html.php create mode 100644 lib-3rd-party/pquery/gan_parser_html.php create mode 100644 lib-3rd-party/pquery/gan_selector_html.php create mode 100644 lib-3rd-party/pquery/gan_tokenizer.php create mode 100644 lib-3rd-party/pquery/gan_xml2array.php create mode 100644 lib-3rd-party/pquery/ganon.php create mode 100644 lib-3rd-party/pquery/pQuery.php create mode 100644 lib-3rd-party/pquery/third_party/jsminplus.php diff --git a/composer.json b/composer.json index 8e3e7f5c4b..ce04ee76d8 100644 --- a/composer.json +++ b/composer.json @@ -8,8 +8,7 @@ "require": { "php": ">=7.1", "mtdowling/cron-expression": "^1.1", - "soundasleep/html2text": "dev-master", - "tburry/pquery": "^1.1.1" + "soundasleep/html2text": "dev-master" }, "require-dev": { "ext-gd": "*", diff --git a/composer.lock b/composer.lock index 0148ef3a68..08ddd95493 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "44e6370ee3bd0f45a6ba712d32f54ef9", + "content-hash": "a736768274d32a1dbad34e9a59f8565d", "packages": [ { "name": "mtdowling/cron-expression", @@ -103,58 +103,6 @@ "source": "https://github.com/mailpoet/html2text/tree/master" }, "time": "2019-04-24T12:03:33+00:00" - }, - { - "name": "tburry/pquery", - "version": "v1.1.1", - "source": { - "type": "git", - "url": "https://github.com/tburry/pquery.git", - "reference": "872339ffd38d261c4417ea1855428b1b4ff9abf1" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/tburry/pquery/zipball/872339ffd38d261c4417ea1855428b1b4ff9abf1", - "reference": "872339ffd38d261c4417ea1855428b1b4ff9abf1", - "shasum": "" - }, - "require": { - "php": ">=5.3.0" - }, - "require-dev": { - "htmlawed/htmlawed": "dev-master" - }, - "type": "library", - "autoload": { - "classmap": [ - "IQuery.php", - "gan_formatter.php", - "gan_node_html.php", - "gan_parser_html.php", - "gan_selector_html.php", - "gan_tokenizer.php", - "gan_xml2array.php", - "pQuery.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "LGPL-2.1" - ], - "authors": [ - { - "name": "Todd Burry", - "email": "todd@vanillaforums.com", - "role": "developer" - } - ], - "description": "A jQuery like html dom parser written in php.", - "keywords": [ - "dom", - "ganon", - "php" - ], - "time": "2016-01-14T20:55:00+00:00" } ], "packages-dev": [ diff --git a/lib-3rd-party/pquery/IQuery.php b/lib-3rd-party/pquery/IQuery.php new file mode 100644 index 0000000000..cd3d18d9e1 --- /dev/null +++ b/lib-3rd-party/pquery/IQuery.php @@ -0,0 +1,151 @@ + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! \ No newline at end of file diff --git a/lib-3rd-party/pquery/gan_formatter.php b/lib-3rd-party/pquery/gan_formatter.php new file mode 100644 index 0000000000..6f6cbbf4ea --- /dev/null +++ b/lib-3rd-party/pquery/gan_formatter.php @@ -0,0 +1,381 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +namespace pQuery; + +/** + * Indents text + * @param string $text + * @param int $indent + * @param string $indent_string + * @return string + */ +function indent_text($text, $indent, $indent_string = ' ') { + if ($indent && $indent_string) { + return str_replace("\n", "\n".str_repeat($indent_string, $indent), $text); + } else { + return $text; + } +} + +/** + * Class used to format/minify HTML nodes + * + * Used like: + * + * format($root); + * ?> + * + */ +class HtmlFormatter { + + /** + * Determines which elements start on a new line and which function as block + * @var array('element' => array('new_line' => true, 'as_block' => true, 'format_inside' => true)) + */ + var $block_elements = array( + 'p' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h1' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h2' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h3' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h4' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h5' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'h6' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + + 'form' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'fieldset' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'legend' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), + 'dl' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), + 'dt' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), + 'dd' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'ol' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'ul' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'li' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), + + 'table' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'tr' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + + 'dir' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'menu' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'address' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'blockquote' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'center' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'del' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), + //'div' => array('new_line' => false, 'as_block' => true, 'format_inside' => true), + 'hr' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'ins' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'noscript' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'pre' => array('new_line' => true, 'as_block' => true, 'format_inside' => false), + 'script' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'style' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + + 'html' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'head' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'body' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), + 'title' => array('new_line' => true, 'as_block' => false, 'format_inside' => false) + ); + + /** + * Determines which characters are considered whitespace + * @var array("\t" => true) True to recognize as new line + */ + var $whitespace = array( + ' ' => false, + "\t" => false, + "\x0B" => false, + "\0" => false, + "\n" => true, + "\r" => true + ); + + /** + * String that is used to generate correct indenting + * @var string + */ + var $indent_string = ' '; + + /** + * String that is used to break lines + * @var string + */ + var $linebreak_string = "\n"; + + /** + * Other formatting options + * @var array + */ + public $options = array( + 'img_alt' => '', + 'self_close_str' => null, + 'attribute_shorttag' => false, + 'sort_attributes' => false, + 'attributes_case' => CASE_LOWER, + 'minify_script' => true + ); + + /** + * Errors found during formatting + * @var array + */ + var $errors = array(); + + + /** + * Class constructor + * @param array $options {@link $options} + */ + function __construct($options = array()) { + $this->options = array_merge($this->options, $options); + + if (isset($options['indent_str'])) + $this->indent_string = $options['indent_str']; + + if (isset($options['linebreak_str'])) + $this->linebreak_string = $options['linebreak_str']; + } + + #php4 PHP4 class constructor compatibility + #function HtmlFormatter($options = array()) {return $this->__construct($options);} + #php4e + + /** + * Class magic invoke method, performs {@link format()} + * @access private + */ + function __invoke(&$node) { + return $this->format($node); + } + + /** + * Minifies HTML / removes unneeded whitespace + * @param DomNode $root + * @param bool $strip_comments + * @param bool $recursive + */ + static function minify_html(&$root, $strip_comments = true, $recursive = true) { + if ($strip_comments) { + foreach($root->select(':comment', false, $recursive, true) as $c) { + $prev = $c->getSibling(-1); + $next = $c->getSibling(1); + $c->delete(); + if ($prev && $next && ($prev->isText()) && ($next->isText())) { + $prev->text .= $next->text; + $next->delete(); + } + } + } + foreach($root->select('(!pre + !xmp + !style + !script + !"?php" + !"~text~" + !"~comment~"):not-empty > "~text~"', false, $recursive, true) as $c) { + $c->text = preg_replace('`\s+`', ' ', $c->text); + } + } + + /** + * Minifies javascript using JSMin+ + * @param DomNode $root + * @param string $indent_string + * @param bool $wrap_comment Wrap javascript in HTML comments () + * @param bool $recursive + * @return bool|array Array of errors on failure, true on succes + */ + static function minify_javascript(&$root, $indent_string = ' ', $wrap_comment = true, $recursive = true) { + #php4 JSMin+ doesn't support PHP4 + #return true; + #php4e + #php5 + include_once('third_party/jsminplus.php'); + + $errors = array(); + foreach($root->select('script:not-empty > "~text~"', false, $recursive, true) as $c) { + try { + $text = $c->text; + while ($text) { + $text = trim($text); + //Remove comment/CDATA tags at begin and end + if (substr($text, 0, 4) === '') || ($end === ']]>'))) { + $text = substr($text, 0, -3); + continue; + } + + break; + } + + if (trim($text)) { + $text = \JSMinPlus::minify($text); + if ($wrap_comment) { + $text = ""; + } + if ($indent_string && ($wrap_comment || (strpos($text, "\n") !== false))) { + $text = indent_text("\n".$text, $c->indent(), $indent_string); + } + } + $c->text = $text; + } catch (\Exception $e) { + $errors[] = array($e, $c->parent->dumpLocation()); + } + } + + return (($errors) ? $errors : true); + #php5e + } + + /** + * Formats HTML + * @param DomNode $root + * @param bool $recursive + * @access private + */ + function format_html(&$root, $recursive = null) { + if ($recursive === null) { + $recursive = true; + self::minify_html($root); + } elseif (is_int($recursive)) { + $recursive = (($recursive > 1) ? $recursive - 1 : false); + } + + $root_tag = strtolower($root->tag); + $in_block = isset($this->block_elements[$root_tag]) && $this->block_elements[$root_tag]['as_block']; + $child_count = count($root->children); + + if (isset($this->options['attributes_case']) && $this->options['attributes_case']) { + $root->attributes = array_change_key_case($root->attributes, $this->options['attributes_case']); + $root->attributes_ns = null; + } + + if (isset($this->options['sort_attributes']) && $this->options['sort_attributes']) { + if ($this->options['sort_attributes'] === 'reverse') { + krsort($root->attributes); + } else { + ksort($root->attributes); + } + } + + if ($root->select(':element', true, false, true)) { + $root->setTag(strtolower($root->tag), true); + if (($this->options['img_alt'] !== null) && ($root_tag === 'img') && (!isset($root->alt))) { + $root->setAttribute('alt', $this->options['img_alt']); + } + } + if ($this->options['self_close_str'] !== null) { + $root->self_close_str = $this->options['self_close_str']; + } + if ($this->options['attribute_shorttag'] !== null) { + $root->attribute_shorttag = $this->options['attribute_shorttag']; + } + + $prev = null; + $n_tag = ''; +// $prev_tag = ''; + $as_block = false; + $prev_asblock = false; + for($i = 0; $i < $child_count; $i++) { + $n =& $root->children[$i]; + $indent = $n->indent(); + + if (!$n->isText()) { + $n_tag = strtolower($n->tag); + $new_line = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['new_line']; + $as_block = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['as_block']; + $format_inside = ((!isset($this->block_elements[$n_tag])) || $this->block_elements[$n_tag]['format_inside']); + + if ($prev && ($prev->isText()) && $prev->text && ($char = $prev->text[strlen($prev->text) - 1]) && isset($this->whitespace[$char])) { + if ($this->whitespace[$char]) { + $prev->text .= str_repeat($this->indent_string, $indent); + } else { + $prev->text = substr_replace($prev->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); + } + } elseif (($new_line || $prev_asblock || ($in_block && ($i === 0)))){ + if ($prev && ($prev->isText())) { + $prev->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); + } else { + $root->addText($this->linebreak_string.str_repeat($this->indent_string, $indent), $i); + ++$child_count; + } + } + + if ($format_inside && count($n->children)) { + //$last = end($n->children); + $last = $n->children[count($n->children) - 1]; + $last_tag = ($last) ? strtolower($last->tag) : ''; + $last_asblock = ($last_tag && isset($this->block_elements[$last_tag]) && $this->block_elements[$last_tag]['as_block']); + + if (($n->childCount(true) > 0) || (trim($n->getPlainText()))) { + if ($last && ($last->isText()) && $last->text && ($char = $last->text[strlen($last->text) - 1]) && isset($this->whitespace[$char])) { + if ($as_block || ($last->index() > 0) || isset($this->whitespace[$last->text[0]])) { + if ($this->whitespace[$char]) { + $last->text .= str_repeat($this->indent_string, $indent); + } else { + $last->text = substr_replace($last->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); + } + } + } elseif (($as_block || $last_asblock || ($in_block && ($i === 0))) && $last) { + if ($last && ($last->isText())) { + $last->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); + } else { + $n->addText($this->linebreak_string.str_repeat($this->indent_string, $indent)); + } + } + } elseif (!trim($n->getInnerText())) { + $n->clear(); + } + + if ($recursive) { + $this->format_html($n, $recursive); + } + } + + } elseif (trim($n->text) && ((($i - 1 < $child_count) && ($char = $n->text[0]) && isset($this->whitespace[$char])) || ($in_block && ($i === 0)))) { + if (isset($this->whitespace[$char])) { + if ($this->whitespace[$char]) { + $n->text = str_repeat($this->indent_string, $indent).$n->text; + } else { + $n->text = substr_replace($n->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), 0, 1); + } + } else { + $n->text = $this->linebreak_string.str_repeat($this->indent_string, $indent).$n->text; + } + } + + $prev = $n; +// $prev_tag = $n_tag; + $prev_asblock = $as_block; + } + + return true; + } + + /** + * Formats HTML/Javascript + * @param DomNode $root + * @see format_html() + */ + function format(&$node) { + $this->errors = array(); + if ($this->options['minify_script']) { + $a = self::minify_javascript($node, $this->indent_string, true, true); + if (is_array($a)) { + foreach($a as $error) { + $this->errors[] = $error[0]->getMessage().' >>> '.$error[1]; + } + } + } + return $this->format_html($node); + } +} + +?> diff --git a/lib-3rd-party/pquery/gan_node_html.php b/lib-3rd-party/pquery/gan_node_html.php new file mode 100644 index 0000000000..a6bb61589b --- /dev/null +++ b/lib-3rd-party/pquery/gan_node_html.php @@ -0,0 +1,2855 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +namespace pQuery; + +/** + * Holds (x)html/xml tag information like tag name, attributes, + * parent, children, self close, etc. + * + */ +class DomNode implements IQuery { + + /** + * Element Node, used for regular elements + */ + const NODE_ELEMENT = 0; + /** + * Text Node + */ + const NODE_TEXT = 1; + /** + * Comment Node + */ + const NODE_COMMENT = 2; + /** + * Conditional Node ( + */ + const NODE_CDATA = 4; + /** + * Doctype Node + */ + const NODE_DOCTYPE = 5; + /** + * XML Node, used for tags that start with ?, like 'value') + * @internal Public for faster access! + * @see getAttribute() + * @see setAttribute() + * @access private + */ + var $attributes = array(); + + /** + * Namespace info for attributes + * @var array + * @internal array('tag' => array(array('ns', 'tag', 'ns:tag', index))) + * @internal Public for easy outside modifications! + * @see findAttribute() + * @access private + */ + var $attributes_ns = null; + + /** + * Array of child nodes + * @var array + * @internal Public for faster access! + * @see childCount() + * @see getChild() + * @see addChild() + * @see deleteChild() + * @access private + */ + var $children = array(); + + /** + * Full tag name (including namespace) + * @var string + * @see getTagName() + * @see getNamespace() + */ + var $tag = ''; + + /** + * Namespace info for tag + * @var array + * @internal array('namespace', 'tag') + * @internal Public for easy outside modifications! + * @access private + */ + var $tag_ns = null; + + /** + * Is node a self closing node? No closing tag if true. + * @var bool + */ + var $self_close = false; + + /** + * If self close, then this will be used to close the tag + * @var string + * @see $self_close + */ + var $self_close_str = ' /'; + + /** + * Use short tags for attributes? If true, then attributes + * with values equal to the attribute name will not output + * the value, e.g. selected="selected" will be selected. + * @var bool + */ + var $attribute_shorttag = true; + + /** + * Function map used for the selector filter + * @var array + * @internal array('root' => 'filter_root') will cause the + * selector to call $this->filter_root at :root + * @access private + */ + var $filter_map = array( + 'root' => 'filter_root', + 'nth-child' => 'filter_nchild', + 'eq' => 'filter_nchild', //jquery (naming) compatibility + 'gt' => 'filter_gt', + 'lt' => 'filter_lt', + 'nth-last-child' => 'filter_nlastchild', + 'nth-of-type' => 'filter_ntype', + 'nth-last-of-type' => 'filter_nlastype', + 'odd' => 'filter_odd', + 'even' => 'filter_even', + 'every' => 'filter_every', + 'first-child' => 'filter_first', + 'last-child' => 'filter_last', + 'first-of-type' => 'filter_firsttype', + 'last-of-type' => 'filter_lasttype', + 'only-child' => 'filter_onlychild', + 'only-of-type' => 'filter_onlytype', + 'empty' => 'filter_empty', + 'not-empty' => 'filter_notempty', + 'has-text' => 'filter_hastext', + 'no-text' => 'filter_notext', + 'lang' => 'filter_lang', + 'contains' => 'filter_contains', + 'has' => 'filter_has', + 'not' => 'filter_not', + 'element' => 'filter_element', + 'text' => 'filter_text', + 'comment' => 'filter_comment', + 'checked' => 'filter_checked', + 'selected' => 'filter_selected', + ); + + /** + * Class constructor + * @param string|array $tag Name of the tag, or array with taginfo (array( + * 'tag_name' => 'tag', + * 'self_close' => false, + * 'attributes' => array('attribute' => 'value'))) + * @param DomNode $parent Parent of node, null if none + */ + function __construct($tag, $parent) { + $this->parent = $parent; + + if (is_string($tag)) { + $this->tag = $tag; + } else { + $this->tag = $tag['tag_name']; + $this->self_close = $tag['self_close']; + $this->attributes = $tag['attributes']; + } + } + + #php4 PHP4 class constructor compatibility + #function DomNode($tag, $parent) {return $this->__construct($tag, $parent);} + #php4e + + /** + * Class destructor + * @access private + */ + function __destruct() { + $this->delete(); + } + + /** + * Class toString, outputs {@link $tag} + * @return string + * @access private + */ + function __toString() { + return (($this->tag === '~root~') ? $this->toString(true, true, 1) : $this->tag); + } + + /** + * Class magic get method, outputs {@link getAttribute()} + * @return string + * @access private + */ + function __get($attribute) { + return $this->getAttribute($attribute); + } + + /** + * Class magic set method, performs {@link setAttribute()} + * @access private + */ + function __set($attribute, $value) { + $this->setAttribute($attribute, $value); + } + + /** + * Class magic isset method, returns {@link hasAttribute()} + * @return bool + * @access private + */ + function __isset($attribute) { + return $this->hasAttribute($attribute); + } + + /** + * Class magic unset method, performs {@link deleteAttribute()} + * @access private + */ + function __unset($attribute) { + return $this->deleteAttribute($attribute); + } + + /** + * Class magic invoke method, performs {@link query()}. + * @param string $query The css query to run on the nodes. + * @return \pQuery + */ + function __invoke($query = '*') { + return $this->query($query); + } + + /** + * Returns place in document + * @return string + */ + function dumpLocation() { + return (($this->parent) ? (($p = $this->parent->dumpLocation()) ? $p.' > ' : '').$this->tag.'('.$this->typeIndex().')' : ''); + } + + /** + * Returns all the attributes and their values + * @return string + * @access private + */ + protected function toString_attributes() { + $s = ''; + foreach($this->attributes as $a => $v) { + $s .= ' '.$a; + if ((!$this->attribute_shorttag) || ($v !== $a)) { + $quote = (strpos($v, '"') === false) ? '"' : "'"; + $s .= '='.$quote.$v.$quote; + } + } + return $s; + } + + /** + * Returns the content of the node (child tags and text) + * @param bool $attributes Print attributes of child tags + * @param bool|int $recursive How many sublevels of childtags to print. True for all. + * @param bool $content_only Only print text, false will print tags too. + * @return string + * @access private + */ + protected function toString_content($attributes = true, $recursive = true, $content_only = false) { + $s = ''; + foreach($this->children as $c) { + $s .= $c->toString($attributes, $recursive, $content_only); + } + return $s; + } + + /** + * Returns the node as string + * @param bool $attributes Print attributes (of child tags) + * @param bool|int $recursive How many sub-levels of child tags to print. True for all. + * @param bool|int $content_only Only print text, false will print tags too. + * @return string + */ + function toString($attributes = true, $recursive = true, $content_only = false) { + if ($content_only) { + if (is_int($content_only)) { + --$content_only; + } + return $this->toString_content($attributes, $recursive, $content_only); + } + + $s = '<'.$this->tag; + if ($attributes) { + $s .= $this->toString_attributes(); + } + if ($this->self_close) { + $s .= $this->self_close_str.'>'; + } else { + $s .= '>'; + if($recursive) { + $s .= $this->toString_content($attributes); + } + $s .= 'tag.'>'; + } + return $s; + } + + /** + * Similar to JavaScript outerText, will return full (html formatted) node + * @return string + */ + function getOuterText() { + return html_entity_decode($this->toString(), ENT_QUOTES); + } + + /** + * Similar to JavaScript outerText, will replace node (and child nodes) with new text + * @param string $text + * @param HtmlParserBase $parser Null to auto create instance + * @return bool|array True on succeed, array with errors on failure + */ + function setOuterText($text, $parser = null) { + if (trim($text)) { + $index = $this->index(); + if ($parser === null) { + $parser = new $this->parserClass(); + } + $parser->setDoc($text); + $parser->parse_all(); + $parser->root->moveChildren($this->parent, $index); + } + $this->delete(); + return (($parser && $parser->errors) ? $parser->errors : true); + } + + /** + * Return html code of node + * @internal jquery (naming) compatibility + * @param string|null $value The value to set or null to get the value. + * @see toString() + * @return string + */ + function html($value = null) { + if ($value !== null) { + $this->setInnerText($value); + } + return $this->getInnerText(); + } + + /** + * Similar to JavaScript innerText, will return (html formatted) content + * @return string + */ + function getInnerText() { + return html_entity_decode($this->toString(true, true, 1), ENT_QUOTES); + } + + /** + * Similar to JavaScript innerText, will replace child nodes with new text + * @param string $text + * @param HtmlParserBase $parser Null to auto create instance + * @return bool|array True on succeed, array with errors on failure + */ + function setInnerText($text, $parser = null) { + $this->clear(); + if (trim($text)) { + if ($parser === null) { + $parser = new $this->parserClass(); + } + $parser->root =& $this; + $parser->setDoc($text); + $parser->parse_all(); + } + return (($parser && $parser->errors) ? $parser->errors : true); + } + + /** + * Similar to JavaScript plainText, will return text in node (and subnodes) + * @return string + */ + function getPlainText() { + return preg_replace('`\s+`', ' ', html_entity_decode($this->toString(true, true, true), ENT_QUOTES)); + } + + /** + * Return plaintext taking document encoding into account + * @return string + */ + function getPlainTextUTF8() { + $txt = $this->toString(true, true, true); + $enc = $this->getEncoding(); + if ($enc !== false) { + $txt = mb_convert_encoding($txt, 'UTF-8', $enc); + } + return preg_replace('`\s+`', ' ', html_entity_decode($txt, ENT_QUOTES, 'UTF-8')); + } + + /** + * Similar to JavaScript plainText, will replace child nodes with new text (literal) + * @param string $text + */ + function setPlainText($text) { + $this->clear(); + if (trim($text)) { + $this->addText(htmlentities($text, ENT_QUOTES)); + } + } + + /** + * Delete node from parent and clear node + */ + function delete() { + if (($p = $this->parent) !== null) { + $this->parent = null; + $p->deleteChild($this); + } else { + $this->clear(); + } + } + + /** + * Detach node from parent + * @param bool $move_children_up Only detach current node and replace it with child nodes + * @internal jquery (naming) compatibility + * @see delete() + */ + function detach($move_children_up = false) { + if (($p = $this->parent) !== null) { + $index = $this->index(); + $this->parent = null; + + if ($move_children_up) { + $this->moveChildren($p, $index); + } + $p->deleteChild($this, true); + } + } + + /** + * Deletes all child nodes from node + */ + function clear() { + foreach($this->children as $c) { + $c->parent = null; + $c->delete(); + } + $this->children = array(); + } + + /** + * Get top parent + * @return DomNode Root, null if node has no parent + */ + function getRoot() { + $r = $this->parent; + $n = ($r === null) ? null : $r->parent; + while ($n !== null) { + $r = $n; + $n = $r->parent; + } + + return $r; + } + + /** + * Change parent + * @param null|DomNode $to New parent, null if none + * @param false|int $index Add child to parent if not present at index, false to not add, negative to count from end, null to append + */ + #php4 + #function changeParent($to, &$index) { + #php4e + #php5 + function changeParent($to, &$index = null) { + #php5e + if ($this->parent !== null) { + $this->parent->deleteChild($this, true); + } + $this->parent = $to; + if ($index !== false) { + $new_index = $this->index(); + if (!(is_int($new_index) && ($new_index >= 0))) { + $this->parent->addChild($this, $index); + } + } + } + + /** + * Find out if node has (a certain) parent + * @param DomNode|string $tag Match against parent, string to match tag, object to fully match node, null to return if node has parent + * @param bool $recursive + * @return bool + */ + function hasParent($tag = null, $recursive = false) { + if ($this->parent !== null) { + if ($tag === null) { + return true; + } elseif (is_string($tag)) { + return (($this->parent->tag === $tag) || ($recursive && $this->parent->hasParent($tag))); + } elseif (is_object($tag)) { + return (($this->parent === $tag) || ($recursive && $this->parent->hasParent($tag))); + } + } + + return false; + } + + /** + * Find out if node is parent of a certain tag + * @param DomNode|string $tag Match against parent, string to match tag, object to fully match node + * @param bool $recursive + * @return bool + * @see hasParent() + */ + function isParent($tag, $recursive = false) { + return ($this->hasParent($tag, $recursive) === ($tag !== null)); + } + + /** + * Find out if node is text + * @return bool + */ + function isText() { + return false; + } + + /** + * Find out if node is comment + * @return bool + */ + function isComment() { + return false; + } + + /** + * Find out if node is text or comment node + * @return bool + */ + function isTextOrComment() { + return false; + } + + /** + * Move node to other node + * @param DomNode $to New parent, null if none + * @param int $new_index Add child to parent at index if not present, null to not add, negative to count from end + * @internal Performs {@link changeParent()} + */ + #php4 + #function move($to, &$new_index) { + #php4e + #php5 + function move($to, &$new_index = -1) { + #php5e + $this->changeParent($to, $new_index); + } + + /** + * Move child nodes to other node + * @param DomNode $to New parent, null if none + * @param int $new_index Add child to new node at index if not present, null to not add, negative to count from end + * @param int $start Index from child node where to start wrapping, 0 for first element + * @param int $end Index from child node where to end wrapping, -1 for last element + */ + #php4 + #function moveChildren($to, &$new_index, $start = 0, $end = -1) { + #php4e + #php5 + function moveChildren($to, &$new_index = -1, $start = 0, $end = -1) { + #php5e + if ($end < 0) { + $end += count($this->children); + } + for ($i = $start; $i <= $end; $i++) { + $this->children[$start]->changeParent($to, $new_index); + } + } + + /** + * Index of node in parent + * @param bool $count_all True to count all tags, false to ignore text and comments + * @return int -1 if not found + */ + function index($count_all = true) { + if (!$this->parent) { + return -1; + } elseif ($count_all) { + return $this->parent->findChild($this); + } else{ + $index = -1; + //foreach($this->parent->children as &$c) { + // if (!$c->isTextOrComment()) { + // ++$index; + // } + // if ($c === $this) { + // return $index; + // } + //} + + foreach(array_keys($this->parent->children) as $k) { + if (!$this->parent->children[$k]->isTextOrComment()) { + ++$index; + } + if ($this->parent->children[$k] === $this) { + return $index; + } + } + return -1; + } + } + + /** + * Change index of node in parent + * @param int $index New index + */ + function setIndex($index) { + if ($this->parent) { + if ($index > $this->index()) { + --$index; + } + $this->delete(); + $this->parent->addChild($this, $index); + } + } + + /** + * Index of all similar nodes in parent + * @return int -1 if not found + */ + function typeIndex() { + if (!$this->parent) { + return -1; + } else { + $index = -1; + //foreach($this->parent->children as &$c) { + // if (strcasecmp($this->tag, $c->tag) === 0) { + // ++$index; + // } + // if ($c === $this) { + // return $index; + // } + //} + + foreach(array_keys($this->parent->children) as $k) { + if (strcasecmp($this->tag, $this->parent->children[$k]->tag) === 0) { + ++$index; + } + if ($this->parent->children[$k] === $this) { + return $index; + } + } + return -1; + } + } + + /** + * Calculate indent of node (number of parent tags - 1) + * @return int + */ + function indent() { + return (($this->parent) ? $this->parent->indent() + 1 : -1); + } + + /** + * Get sibling node + * @param int $offset Offset from current node + * @return DomNode Null if not found + */ + function getSibling($offset = 1) { + $index = $this->index() + $offset; + if (($index >= 0) && ($index < $this->parent->childCount())) { + return $this->parent->getChild($index); + } else { + return null; + } + } + + /** + * Get node next to current + * @param bool $skip_text_comments + * @return DomNode Null if not found + * @see getSibling() + * @see getPreviousSibling() + */ + function getNextSibling($skip_text_comments = true) { + $offset = 1; + while (($n = $this->getSibling($offset)) !== null) { + if ($skip_text_comments && ($n->tag[0] === '~')) { + ++$offset; + } else { + break; + } + } + + return $n; + } + + /** + * Get node previous to current + * @param bool $skip_text_comments + * @return DomNode Null if not found + * @see getSibling() + * @see getNextSibling() + */ + function getPreviousSibling($skip_text_comments = true) { + $offset = -1; + while (($n = $this->getSibling($offset)) !== null) { + if ($skip_text_comments && ($n->tag[0] === '~')) { + --$offset; + } else { + break; + } + } + + return $n; + } + + /** + * Get namespace of node + * @return string + * @see setNamespace() + */ + function getNamespace() { + if ($this->tag_ns === null) { + $a = explode(':', $this->tag, 2); + if (empty($a[1])) { + $this->tag_ns = array('', $a[0]); + } else { + $this->tag_ns = array($a[0], $a[1]); + } + } + + return $this->tag_ns[0]; + } + + /** + * Set namespace of node + * @param string $ns + * @see getNamespace() + */ + function setNamespace($ns) { + if ($this->getNamespace() !== $ns) { + $this->tag_ns[0] = $ns; + $this->tag = $ns.':'.$this->tag_ns[1]; + } + } + + /** + * Get tagname of node (without namespace) + * @return string + * @see setTag() + */ + function getTag() { + if ($this->tag_ns === null) { + $this->getNamespace(); + } + + return $this->tag_ns[1]; + } + + /** + * Set tag (with or without namespace) + * @param string $tag + * @param bool $with_ns Does $tag include namespace? + * @see getTag() + */ + function setTag($tag, $with_ns = false) { + $with_ns = $with_ns || (strpos($tag, ':') !== false); + if ($with_ns) { + $this->tag = $tag; + $this->tag_ns = null; + } elseif ($this->getTag() !== $tag) { + $this->tag_ns[1] = $tag; + $this->tag = (($this->tag_ns[0]) ? $this->tag_ns[0].':' : '').$tag; + } + } + + /** + * Try to determine the encoding of the current tag + * @return string|bool False if encoding could not be found + */ + function getEncoding() { + $root = $this->getRoot(); + if ($root !== null) { + if ($enc = $root->select('meta[charset]', 0, true, true)) { + return $enc->getAttribute("charset"); + } elseif ($enc = $root->select('"?xml"[encoding]', 0, true, true)) { + return $enc->getAttribute("encoding"); + } elseif ($enc = $root->select('meta[content*="charset="]', 0, true, true)) { + $enc = $enc->getAttribute("content"); + return substr($enc, strpos($enc, "charset=")+8); + } + } + + return false; + } + + /** + * Number of children in node + * @param bool $ignore_text_comments Ignore text/comments with calculation + * @return int + */ + function childCount($ignore_text_comments = false) { + if (!$ignore_text_comments) { + return count($this->children); + } else{ + $count = 0; + //foreach($this->children as &$c) { + // if (!$c->isTextOrComment()) { + // ++$count; + // } + //} + + foreach(array_keys($this->children) as $k) { + if (!$this->children[$k]->isTextOrComment()) { + ++$count; + } + } + return $count; + } + } + + /** + * Find node in children + * @param DomNode $child + * @return int False if not found + */ + function findChild($child) { + return array_search($child, $this->children, true); + } + + /** + * Checks if node has another node as child + * @param DomNode $child + * @return bool + */ + function hasChild($child) { + return ((bool) findChild($child)); + } + + /** + * Get childnode + * @param int|DomNode $child Index, negative to count from end + * @param bool $ignore_text_comments Ignore text/comments with index calculation + * @return DomNode + */ + function &getChild($child, $ignore_text_comments = false) { + if (!is_int($child)) { + $child = $this->findChild($child); + } elseif ($child < 0) { + $child += $this->childCount($ignore_text_comments); + } + + if ($ignore_text_comments) { + $count = 0; + $last = null; + //foreach($this->children as &$c) { + // if (!$c->isTextOrComment()) { + // if ($count++ === $child) { + // return $c; + // } + // $last = $c; + // } + //} + + foreach(array_keys($this->children) as $k) { + if (!$this->children[$k]->isTextOrComment()) { + if ($count++ === $child) { + return $this->children[$k]; + } + $last = $this->children[$k]; + } + } + return (($child > $count) ? $last : null); + } else { + return $this->children[$child]; + } + } + + /** + * Add child node + * @param string|DomNode $tag Tag name or object + * @param int $offset Position to insert node, negative to count from end, null to append + * @return DomNode Added node + */ + #php4 + #function &addChild($tag, &$offset) { + #php4e + #php5 + function &addChild($tag, &$offset = null) { + #php5e + if (is_array($tag)) { + $tag = new $this->childClass($tag, $this); + } elseif (is_string($tag)) { + $nodes = $this->createNodes($tag); + $tag = array_shift($nodes); + + if ($tag && $tag->parent !== $this) { + $index = false; + $tag->changeParent($this, $index); + } + } elseif (is_object($tag) && $tag->parent !== $this) { + $index = false; //Needs to be passed by ref + $tag->changeParent($this, $index); + } + + if (is_int($offset) && ($offset < count($this->children)) && ($offset !== -1)) { + if ($offset < 0) { + $offset += count($this->children); + } + array_splice($this->children, $offset++, 0, array(&$tag)); + } else { + $this->children[] =& $tag; + } + + return $tag; + } + + /** + * First child node + * @param bool $ignore_text_comments Ignore text/comments with index calculation + * @return DomNode + */ + function &firstChild($ignore_text_comments = false) { + return $this->getChild(0, $ignore_text_comments); + } + + /** + * Last child node + * @param bool $ignore_text_comments Ignore text/comments with index calculation + * @return DomNode + */ + function &lastChild($ignore_text_comments = false) { + return $this->getChild(-1, $ignore_text_comments); + } + + /** + * Insert childnode + * @param string|DomNode $tag Tagname or object + * @param int $offset Position to insert node, negative to count from end, null to append + * @return DomNode Added node + * @see addChild(); + */ + function &insertChild($tag, $index) { + return $this->addChild($tag, $index); + } + + /** + * Add text node + * @param string $text + * @param int $offset Position to insert node, negative to count from end, null to append + * @return DomNode Added node + * @see addChild(); + */ + #php4 + #function &addText($text, &$offset) { + #php4e + #php5 + function &addText($text, &$offset = null) { + #php5e + return $this->addChild(new $this->childClass_Text($this, $text), $offset); + } + + /** + * Add comment node + * @param string $text + * @param int $offset Position to insert node, negative to count from end, null to append + * @return DomNode Added node + * @see addChild(); + */ + #php4 + #function &addComment($text, &$offset) { + #php4e + #php5 + function &addComment($text, &$offset = null) { + #php5e + return $this->addChild(new $this->childClass_Comment($this, $text), $offset); + } + + /** + * Add conditional node + * @param string $condition + * @param bool True for ';} +} + +/** + * Node subclass for conditional tags + */ +class ConditionalTagNode extends DomNode { + #php4 Compatibility with PHP4, this gets changed to a regular var in release tool + #static $NODE_TYPE = self::NODE_CONDITIONAL; + #php4e + #php5 + const NODE_TYPE = self::NODE_CONDITIONAL; + #php5e + var $tag = '~conditional~'; + + /** + * @var string + */ + var $condition = ''; + + /** + * Class constructor + * @param DomNode $parent + * @param string $condition e.g. "if IE" + * @param bool $hidden ', false) !== self::TOK_UNKNOWN) { + $this->status['comment'] = $this->getTokenString(1, -1); + --$this->pos; + } else { + $this->status['comment'] = $this->getTokenString(1, -1); + $this->pos += 2; + } + $this->status['last_pos'] = $this->pos; + + return true; + } + + /** + * Parse doctype tag + * @internal Gets called with doctype ("pos; + if ($this->next_search('[>', false) === self::TOK_UNKNOWN) { + if ($this->doc[$this->pos] === '[') { + if (($this->next_pos(']', false) !== self::TOK_UNKNOWN) || ($this->next_pos('>', false) !== self::TOK_UNKNOWN)) { + $this->addError('Invalid doctype'); + return false; + } + } + + $this->token_start = $start; + $this->status['dtd'] = $this->getTokenString(2, -1); + $this->status['last_pos'] = $this->pos; + return true; + } else { + $this->addError('Invalid doctype'); + return false; + } + } + + /** + * Parse cdata tag + * @internal Gets called with cdata ("next_pos(']]>', false) === self::TOK_UNKNOWN) { + $this->status['cdata'] = $this->getTokenString(9, -1); + $this->status['last_pos'] = $this->pos + 2; + return true; + } else { + $this->addError('Invalid cdata tag'); + return false; + } + } + + /** + * Parse php tags + * @internal Gets called with php tags ("pos; + if ($this->next_pos('?>', false) !== self::TOK_UNKNOWN) { + $this->pos -= 2; //End of file + } + + $len = $this->pos - 1 - $start; + $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ''); + $this->status['last_pos'] = ++$this->pos; + return true; + } + + /** + * Parse asp tags + * @internal Gets called with asp tags ("<%") + * @return bool + */ + function parse_asp() { + $start = $this->pos; + if ($this->next_pos('%>', false) !== self::TOK_UNKNOWN) { + $this->pos -= 2; //End of file + } + + $len = $this->pos - 1 - $start; + $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ''); + $this->status['last_pos'] = ++$this->pos; + return true; + } + + /** + * Parse style tags + * @internal Gets called with php tags ("', false) === self::TOK_UNKNOWN)) { + $len = $this->pos - 1 - $start; + $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ''); + + $this->pos += 7; + $this->status['last_pos'] = $this->pos; + return true; + } else { + $this->addError('No end for style tag found'); + return false; + } + } + + /** + * Parse script tags + * @internal Gets called with php tags ("', false) === self::TOK_UNKNOWN)) { + $len = $this->pos - 1 - $start; + $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : ''); + + $this->pos += 8; + $this->status['last_pos'] = $this->pos; + return true; + } else { + $this->addError('No end for script tag found'); + return false; + } + } + + /** + * Parse conditional tags (+ all conditional tags inside) + * @internal Gets called with IE conditionals ("status['closing_tag']) { + $this->pos += 8; + } else { + $this->pos += (($this->status['comment']) ? 5 : 3); + if ($this->next_pos(']', false) !== self::TOK_UNKNOWN) { + $this->addError('"]" not found in conditional tag'); + return false; + } + $this->status['tag_condition'] = $this->getTokenString(0, -1); + } + + if ($this->next_no_whitespace() !== self::TOK_TAG_CLOSE) { + $this->addError('No ">" tag found 2 for conditional tag'); + return false; + } + + if ($this->status['comment']) { + $this->status['last_pos'] = $this->pos; + if ($this->next_pos('-->', false) !== self::TOK_UNKNOWN) { + $this->addError('No ending tag found for conditional tag'); + $this->pos = $this->size - 1; + + $len = $this->pos - 1 - $this->status['last_pos']; + $this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : ''); + } else { + $len = $this->pos - 10 - $this->status['last_pos']; + $this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : ''); + $this->pos += 2; + } + } + + $this->status['last_pos'] = $this->pos; + return true; + } + + /** + * Parse attributes (names + value) + * @internal Stores attributes in {@link $status}[attributes] (array(ATTR => VAL)) + * @return bool + */ + function parse_attributes() { + $this->status['attributes'] = array(); + + while ($this->next_no_whitespace() === self::TOK_IDENTIFIER) { + $attr = $this->getTokenString(); + if (($attr === '?') || ($attr === '%')) { + //Probably closing tags + break; + } + + if ($this->next_no_whitespace() === self::TOK_EQUALS) { + if ($this->next_no_whitespace() === self::TOK_STRING) { + $val = $this->getTokenString(1, -1); + } else { + $this->token_start = $this->pos; + if (!isset($stop)) { + $stop = $this->whitespace; + $stop['<'] = true; + $stop['>'] = true; + } + + while ((++$this->pos < $this->size) && (!isset($stop[$this->doc[$this->pos]]))) { + // Do nothing. + } + --$this->pos; + + $val = $this->getTokenString(); + + if (trim($val) === '') { + $this->addError('Invalid attribute value'); + return false; + } + } + } else { + $val = $attr; + $this->pos = (($this->token_start) ? $this->token_start : $this->pos) - 1; + } + + $this->status['attributes'][$attr] = $val; + } + + return true; + } + + /** + * Default callback for tags + * @internal Gets called after the tagname () + * @return bool + */ + function parse_tag_default() { + if ($this->status['closing_tag']) { + $this->status['attributes'] = array(); + $this->next_no_whitespace(); + } else { + if (!$this->parse_attributes()) { + return false; + } + } + + if ($this->token !== self::TOK_TAG_CLOSE) { + if ($this->token === self::TOK_SLASH_FORWARD) { + $this->status['self_close'] = true; + $this->next(); + } elseif ((($this->status['tag_name'][0] === '?') && ($this->doc[$this->pos] === '?')) || (($this->status['tag_name'][0] === '%') && ($this->doc[$this->pos] === '%'))) { + $this->status['self_close'] = true; + $this->pos++; + + if (isset($this->char_map[$this->doc[$this->pos]]) && (!is_string($this->char_map[$this->doc[$this->pos]]))) { + $this->token = $this->char_map[$this->doc[$this->pos]]; + } else { + $this->token = self::TOK_UNKNOWN; + } + }/* else { + $this->status['self_close'] = false; + }*/ + } + + if ($this->token !== self::TOK_TAG_CLOSE) { + $this->addError('Expected ">", but found "'.$this->getTokenString().'"'); + if ($this->next_pos('>', false) !== self::TOK_UNKNOWN) { + $this->addError('No ">" tag found for "'.$this->status['tag_name'].'" tag'); + return false; + } + } + + return true; + } + + /** + * Parse tag + * @internal Gets called after opening tag (<*ENTERS_HERE*html attribute="value">) + * @internal Stores information about the tag in {@link $status} (comment, closing_tag, tag_name) + * @return bool + */ + function parse_tag() { + $start = $this->pos; + $this->status['self_close'] = false; + $this->parse_text(); + + $next = (($this->pos + 1) < $this->size) ? $this->doc[$this->pos + 1] : ''; + if ($next === '!') { + $this->status['closing_tag'] = false; + + if (substr($this->doc, $this->pos + 2, 2) === '--') { + $this->status['comment'] = true; + + if (($this->doc[$this->pos + 4] === '[') && (strcasecmp(substr($this->doc, $this->pos + 5, 2), 'if') === 0)) { + return $this->parse_conditional(); + } else { + return $this->parse_comment(); + } + } else { + $this->status['comment'] = false; + + if ($this->doc[$this->pos + 2] === '[') { + if (strcasecmp(substr($this->doc, $this->pos + 3, 2), 'if') === 0) { + return $this->parse_conditional(); + } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'endif') === 0) { + $this->status['closing_tag'] = true; + return $this->parse_conditional(); + } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'cdata') === 0) { + return $this->parse_cdata(); + } + } + } + } elseif ($next === '/') { + $this->status['closing_tag'] = true; + ++$this->pos; + } else { + $this->status['closing_tag'] = false; + } + + if ($this->next() !== self::TOK_IDENTIFIER) { + $this->addError('Tagname expected'); + //if ($this->next_pos('>', false) === self::TOK_UNKNOWN) { + $this->status['last_pos'] = $start - 1; + return true; + //} else { + // return false; + //} + } + + $tag = $this->getTokenString(); + $this->status['tag_name'] = $tag; + $tag = strtolower($tag); + + if (isset($this->tag_map[$tag])) { + $res = $this->{$this->tag_map[$tag]}(); + } else { + $res = $this->parse_tag_default(); + } + + $this->status['last_pos'] = $this->pos; + return $res; + } + + /** + * Parse full document + * @return bool + */ + function parse_all() { + $this->errors = array(); + $this->status['last_pos'] = -1; + + if (($this->token === self::TOK_TAG_OPEN) || ($this->next_pos('<', false) === self::TOK_UNKNOWN)) { + do { + if (!$this->parse_tag()) { + return false; + } + } while ($this->next_pos('<') !== self::TOK_NULL); + } + + $this->pos = $this->size; + $this->parse_text(); + + return true; + } +} + +/** + * Parses a HTML document into a HTML DOM + */ +class HtmlParser extends HtmlParserBase { + + /** + * Root object + * @internal If string, then it will create a new instance as root + * @var DomNode + */ + var $root = 'pQuery\\DomNode'; + + /** + * Current parsing hierarchy + * @internal Root is always at index 0, current tag is at the end of the array + * @var array + * @access private + */ + var $hierarchy = array(); + + /** + * Tags that don't need closing tags + * @var array + * @access private + */ + var $tags_selfclose = array( + 'area' => true, + 'base' => true, + 'basefont' => true, + 'br' => true, + 'col' => true, + 'command' => true, + 'embed' => true, + 'frame' => true, + 'hr' => true, + 'img' => true, + 'input' => true, + 'ins' => true, + 'keygen' => true, + 'link' => true, + 'meta' => true, + 'param' => true, + 'source' => true, + 'track' => true, + 'wbr' => true + ); + + /** + * Class constructor + * @param string $doc Document to be tokenized + * @param int $pos Position to start parsing + * @param DomNode $root Root node, null to auto create + */ + function __construct($doc = '', $pos = 0, $root = null) { + if ($root === null) { + $root = new $this->root('~root~', null); + } + $this->root =& $root; + + parent::__construct($doc, $pos); + } + + #php4 PHP4 class constructor compatibility + #function HtmlParser($doc = '', $pos = 0, $root = null) {return $this->__construct($doc, $pos, $root);} + #php4e + + /** + * Class magic invoke method, performs {@link select()} + * @return array + * @access private + */ + function __invoke($query = '*') { + return $this->select($query); + } + + /** + * Class magic toString method, performs {@link DomNode::toString()} + * @return string + * @access private + */ + function __toString() { + return $this->root->getInnerText(); + } + + /** + * Performs a css select query on the root node + * @see DomNode::select() + * @return array + */ + function select($query = '*', $index = false, $recursive = true, $check_self = false) { + return $this->root->select($query, $index, $recursive, $check_self); + } + + /** + * Updates the current hierarchy status and checks for + * correct opening/closing of tags + * @param bool $self_close Is current tag self closing? Null to use {@link tags_selfclose} + * @internal This is were most of the nodes get added + * @access private + */ + protected function parse_hierarchy($self_close = null) { + if ($self_close === null) { + $this->status['self_close'] = ($self_close = isset($this->tags_selfclose[strtolower($this->status['tag_name'])])); + } + + if ($self_close) { + if ($this->status['closing_tag']) { + + //$c = end($this->hierarchy)->children + $c = $this->hierarchy[count($this->hierarchy) - 1]->children; + $found = false; + for ($count = count($c), $i = $count - 1; $i >= 0; $i--) { + if (strcasecmp($c[$i]->tag, $this->status['tag_name']) === 0) { + for($ii = $i + 1; $ii < $count; $ii++) { + $index = null; //Needs to be passed by ref + $c[$i + 1]->changeParent($c[$i], $index); + } + $c[$i]->self_close = false; + + $found = true; + break; + } + } + + if (!$found) { + $this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open'); + } + + } elseif ($this->status['tag_name'][0] === '?') { + //end($this->hierarchy)->addXML($this->status['tag_name'], '', $this->status['attributes']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addXML($this->status['tag_name'], '', $this->status['attributes'], $index); + } elseif ($this->status['tag_name'][0] === '%') { + //end($this->hierarchy)->addASP($this->status['tag_name'], '', $this->status['attributes']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addASP($this->status['tag_name'], '', $this->status['attributes'], $index); + } else { + //end($this->hierarchy)->addChild($this->status); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); + } + } elseif ($this->status['closing_tag']) { + $found = false; + for ($count = count($this->hierarchy), $i = $count - 1; $i >= 0; $i--) { + if (strcasecmp($this->hierarchy[$i]->tag, $this->status['tag_name']) === 0) { + + for($ii = ($count - $i - 1); $ii >= 0; $ii--) { + $e = array_pop($this->hierarchy); + if ($ii > 0) { + $this->addError('Closing tag "'.$this->status['tag_name'].'" while "'.$e->tag.'" is not closed yet'); + } + } + + $found = true; + break; + } + } + + if (!$found) { + $this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open'); + } + + } else { + //$this->hierarchy[] = end($this->hierarchy)->addChild($this->status); + $index = null; //Needs to be passed by ref + $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); + } + } + + function parse_cdata() { + if (!parent::parse_cdata()) {return false;} + + //end($this->hierarchy)->addCDATA($this->status['cdata']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addCDATA($this->status['cdata'], $index); + return true; + } + + function parse_comment() { + if (!parent::parse_comment()) {return false;} + + //end($this->hierarchy)->addComment($this->status['comment']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addComment($this->status['comment'], $index); + return true; + } + + function parse_conditional() { + if (!parent::parse_conditional()) {return false;} + + if ($this->status['comment']) { + //$e = end($this->hierarchy)->addConditional($this->status['tag_condition'], true); + $index = null; //Needs to be passed by ref + $e = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], true, $index); + if ($this->status['text'] !== '') { + $index = null; //Needs to be passed by ref + $e->addText($this->status['text'], $index); + } + } else { + if ($this->status['closing_tag']) { + $this->parse_hierarchy(false); + } else { + //$this->hierarchy[] = end($this->hierarchy)->addConditional($this->status['tag_condition'], false); + $index = null; //Needs to be passed by ref + $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], false, $index); + } + } + + return true; + } + + function parse_doctype() { + if (!parent::parse_doctype()) {return false;} + + //end($this->hierarchy)->addDoctype($this->status['dtd']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addDoctype($this->status['dtd'], $index); + return true; + } + + function parse_php() { + if (!parent::parse_php()) {return false;} + + //end($this->hierarchy)->addXML('php', $this->status['text']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addXML('php', $this->status['text'], $index); + return true; + } + + function parse_asp() { + if (!parent::parse_asp()) {return false;} + + //end($this->hierarchy)->addASP('', $this->status['text']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addASP('', $this->status['text'], $index); + return true; + } + + function parse_script() { + if (!parent::parse_script()) {return false;} + + //$e = end($this->hierarchy)->addChild($this->status); + $index = null; //Needs to be passed by ref + $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); + if ($this->status['text'] !== '') { + $index = null; //Needs to be passed by ref + $e->addText($this->status['text'], $index); + } + return true; + } + + function parse_style() { + if (!parent::parse_style()) {return false;} + + //$e = end($this->hierarchy)->addChild($this->status); + $index = null; //Needs to be passed by ref + $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index); + if ($this->status['text'] !== '') { + $index = null; //Needs to be passed by ref + $e->addText($this->status['text'], $index); + } + return true; + } + + function parse_tag_default() { + if (!parent::parse_tag_default()) {return false;} + + $this->parse_hierarchy(($this->status['self_close']) ? true : null); + return true; + } + + function parse_text() { + parent::parse_text(); + if ($this->status['text'] !== '') { + //end($this->hierarchy)->addText($this->status['text']); + $index = null; //Needs to be passed by ref + $this->hierarchy[count($this->hierarchy) - 1]->addText($this->status['text'], $index); + } + } + + function parse_all() { + $this->hierarchy = array(&$this->root); + return ((parent::parse_all()) ? $this->root : false); + } +} + +/** + * HTML5 specific parser (adds support for omittable closing tags) + */ +class Html5Parser extends HtmlParser { + + /** + * Tags with ommitable closing tags + * @var array array('tag2' => 'tag1') will close tag1 if following (not child) tag is tag2 + * @access private + */ + var $tags_optional_close = array( + //Current tag => Previous tag + 'li' => array('li' => true), + 'dt' => array('dt' => true, 'dd' => true), + 'dd' => array('dt' => true, 'dd' => true), + 'address' => array('p' => true), + 'article' => array('p' => true), + 'aside' => array('p' => true), + 'blockquote' => array('p' => true), + 'dir' => array('p' => true), + 'div' => array('p' => true), + 'dl' => array('p' => true), + 'fieldset' => array('p' => true), + 'footer' => array('p' => true), + 'form' => array('p' => true), + 'h1' => array('p' => true), + 'h2' => array('p' => true), + 'h3' => array('p' => true), + 'h4' => array('p' => true), + 'h5' => array('p' => true), + 'h6' => array('p' => true), + 'header' => array('p' => true), + 'hgroup' => array('p' => true), + 'hr' => array('p' => true), + 'menu' => array('p' => true), + 'nav' => array('p' => true), + 'ol' => array('p' => true), + 'p' => array('p' => true), + 'pre' => array('p' => true), + 'section' => array('p' => true), + 'table' => array('p' => true), + 'ul' => array('p' => true), + 'rt' => array('rt' => true, 'rp' => true), + 'rp' => array('rt' => true, 'rp' => true), + 'optgroup' => array('optgroup' => true, 'option' => true), + 'option' => array('option'), + 'tbody' => array('thread' => true, 'tbody' => true, 'tfoot' => true), + 'tfoot' => array('thread' => true, 'tbody' => true), + 'tr' => array('tr' => true), + 'td' => array('td' => true, 'th' => true), + 'th' => array('td' => true, 'th' => true), + 'body' => array('head' => true) + ); + + protected function parse_hierarchy($self_close = null) { + $tag_curr = strtolower($this->status['tag_name']); + if ($self_close === null) { + $this->status['self_close'] = ($self_close = isset($this->tags_selfclose[$tag_curr])); + } + + if (! ($self_close || $this->status['closing_tag'])) { + //$tag_prev = strtolower(end($this->hierarchy)->tag); + $tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag); + if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) { + array_pop($this->hierarchy); + } + } + + return parent::parse_hierarchy($self_close); + } +} + +?> \ No newline at end of file diff --git a/lib-3rd-party/pquery/gan_selector_html.php b/lib-3rd-party/pquery/gan_selector_html.php new file mode 100644 index 0000000000..c7c2b4fb47 --- /dev/null +++ b/lib-3rd-party/pquery/gan_selector_html.php @@ -0,0 +1,949 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +namespace pQuery; + +/** + * Tokenizes a css selector query + */ +class CSSQueryTokenizer extends TokenizerBase { + + /** + * Opening bracket token, used for "[" + */ + const TOK_BRACKET_OPEN = 100; + /** + * Closing bracket token, used for "]" + */ + const TOK_BRACKET_CLOSE = 101; + /** + * Opening brace token, used for "(" + */ + const TOK_BRACE_OPEN = 102; + /** + * Closing brace token, used for ")" + */ + const TOK_BRACE_CLOSE = 103; + /** + * String token + */ + const TOK_STRING = 104; + /** + * Colon token, used for ":" + */ + const TOK_COLON = 105; + /** + * Comma token, used for "," + */ + const TOK_COMMA = 106; + /** + * "Not" token, used for "!" + */ + const TOK_NOT = 107; + + /** + * "All" token, used for "*" in query + */ + const TOK_ALL = 108; + /** + * Pipe token, used for "|" + */ + const TOK_PIPE = 109; + /** + * Plus token, used for "+" + */ + const TOK_PLUS = 110; + /** + * "Sibling" token, used for "~" in query + */ + const TOK_SIBLING = 111; + /** + * Class token, used for "." in query + */ + const TOK_CLASS = 112; + /** + * ID token, used for "#" in query + */ + const TOK_ID = 113; + /** + * Child token, used for ">" in query + */ + const TOK_CHILD = 114; + + /** + * Attribute compare prefix token, used for "|=" + */ + const TOK_COMPARE_PREFIX = 115; + /** + * Attribute contains token, used for "*=" + */ + const TOK_COMPARE_CONTAINS = 116; + /** + * Attribute contains word token, used for "~=" + */ + const TOK_COMPARE_CONTAINS_WORD = 117; + /** + * Attribute compare end token, used for "$=" + */ + const TOK_COMPARE_ENDS = 118; + /** + * Attribute equals token, used for "=" + */ + const TOK_COMPARE_EQUALS = 119; + /** + * Attribute not equal token, used for "!=" + */ + const TOK_COMPARE_NOT_EQUAL = 120; + /** + * Attribute compare bigger than token, used for ">=" + */ + const TOK_COMPARE_BIGGER_THAN = 121; + /** + * Attribute compare smaller than token, used for "<=" + */ + const TOK_COMPARE_SMALLER_THAN = 122; + /** + * Attribute compare with regex, used for "%=" + */ + const TOK_COMPARE_REGEX = 123; + /** + * Attribute compare start token, used for "^=" + */ + const TOK_COMPARE_STARTS = 124; + + /** + * Sets query identifiers + * @see TokenizerBase::$identifiers + * @access private + */ + var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-?'; + + /** + * Map characters to match their tokens + * @see TokenizerBase::$custom_char_map + * @access private + */ + var $custom_char_map = array( + '.' => self::TOK_CLASS, + '#' => self::TOK_ID, + ',' => self::TOK_COMMA, + '>' => 'parse_gt',//self::TOK_CHILD, + + '+' => self::TOK_PLUS, + '~' => 'parse_sibling', + + '|' => 'parse_pipe', + '*' => 'parse_star', + '$' => 'parse_compare', + '=' => self::TOK_COMPARE_EQUALS, + '!' => 'parse_not', + '%' => 'parse_compare', + '^' => 'parse_compare', + '<' => 'parse_compare', + + '"' => 'parse_string', + "'" => 'parse_string', + '(' => self::TOK_BRACE_OPEN, + ')' => self::TOK_BRACE_CLOSE, + '[' => self::TOK_BRACKET_OPEN, + ']' => self::TOK_BRACKET_CLOSE, + ':' => self::TOK_COLON + ); + + /** + * Parse ">" character + * @internal Could be {@link TOK_CHILD} or {@link TOK_COMPARE_BIGGER_THAN} + * @return int + */ + protected function parse_gt() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + ++$this->pos; + return ($this->token = self::TOK_COMPARE_BIGGER_THAN); + } else { + return ($this->token = self::TOK_CHILD); + } + } + + /** + * Parse "~" character + * @internal Could be {@link TOK_SIBLING} or {@link TOK_COMPARE_CONTAINS_WORD} + * @return int + */ + protected function parse_sibling() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + ++$this->pos; + return ($this->token = self::TOK_COMPARE_CONTAINS_WORD); + } else { + return ($this->token = self::TOK_SIBLING); + } + } + + /** + * Parse "|" character + * @internal Could be {@link TOK_PIPE} or {@link TOK_COMPARE_PREFIX} + * @return int + */ + protected function parse_pipe() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + ++$this->pos; + return ($this->token = self::TOK_COMPARE_PREFIX); + } else { + return ($this->token = self::TOK_PIPE); + } + } + + /** + * Parse "*" character + * @internal Could be {@link TOK_ALL} or {@link TOK_COMPARE_CONTAINS} + * @return int + */ + protected function parse_star() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + ++$this->pos; + return ($this->token = self::TOK_COMPARE_CONTAINS); + } else { + return ($this->token = self::TOK_ALL); + } + } + + /** + * Parse "!" character + * @internal Could be {@link TOK_NOT} or {@link TOK_COMPARE_NOT_EQUAL} + * @return int + */ + protected function parse_not() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + ++$this->pos; + return ($this->token = self::TOK_COMPARE_NOT_EQUAL); + } else { + return ($this->token = self::TOK_NOT); + } + } + + /** + * Parse several compare characters + * @return int + */ + protected function parse_compare() { + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { + switch($this->doc[$this->pos++]) { + case '$': + return ($this->token = self::TOK_COMPARE_ENDS); + case '%': + return ($this->token = self::TOK_COMPARE_REGEX); + case '^': + return ($this->token = self::TOK_COMPARE_STARTS); + case '<': + return ($this->token = self::TOK_COMPARE_SMALLER_THAN); + } + } + return false; + } + + /** + * Parse strings (" and ') + * @return int + */ + protected function parse_string() { + $char = $this->doc[$this->pos]; + + while (true) { + if ($this->next_search($char.'\\', false) !== self::TOK_NULL) { + if($this->doc[$this->pos] === $char) { + break; + } else { + ++$this->pos; + } + } else { + $this->pos = $this->size - 1; + break; + } + } + + return ($this->token = self::TOK_STRING); + } + +} + +/** + * Performs a css select query on HTML nodes + */ +class HtmlSelector { + + /** + * Parser object + * @internal If string, then it will create a new instance as parser + * @var CSSQueryTokenizer + */ + var $parser = 'pQuery\\CSSQueryTokenizer'; + + /** + * Target of queries + * @var DomNode + */ + var $root = null; + + /** + * Last performed query, result in {@link $result} + * @var string + */ + var $query = ''; + + /** + * Array of matching nodes + * @var array + */ + var $result = array(); + + /** + * Include root in search, if false the only child nodes are evaluated + * @var bool + */ + var $search_root = false; + + /** + * Search recursively + * @var bool + */ + var $search_recursive = true; + + /** + * Extra function map for custom filters + * @var array + * @internal array('root' => 'filter_root') will cause the + * selector to call $this->filter_root at :root + * @see DomNode::$filter_map + */ + var $custom_filter_map = array(); + + /** + * Class constructor + * @param DomNode $root {@link $root} + * @param string $query + * @param bool $search_root {@link $search_root} + * @param bool $search_recursive {@link $search_recursive} + * @param CSSQueryTokenizer $parser If null, then default class will be used + */ + function __construct($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) { + if ($parser === null) { + $parser = new $this->parser(); + } + $this->parser = $parser; + $this->root =& $root; + + $this->search_root = $search_root; + $this->search_recursive = $search_recursive; + + $this->select($query); + } + + #php4 PHP4 class constructor compatibility + #function HtmlSelector($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) {return $this->__construct($root, $query, $search_root, $search_recursive, $parser);} + #php4e + + /** + * toString method, returns {@link $query} + * @return string + * @access private + */ + function __toString() { + return $this->query; + } + + /** + * Class magic invoke method, performs {@link select()} + * @return array + * @access private + */ + function __invoke($query = '*') { + return $this->select($query); + } + + /** + * Perform query + * @param string $query + * @return array False on failure + */ + function select($query = '*') { + $this->parser->setDoc($query); + $this->query = $query; + return (($this->parse()) ? $this->result : false); + } + + /** + * Trigger error + * @param string $error + * @internal %pos% and %tok% will be replace in string with position and token(string) + * @access private + */ + protected function error($error) { + $error = htmlentities(str_replace( + array('%tok%', '%pos%'), + array($this->parser->getTokenString(), (int) $this->parser->getPos()), + $error + )); + + trigger_error($error); + } + + /** + * Get identifier (parse identifier or string) + * @param bool $do_error Error on failure + * @return string False on failure + * @access private + */ + protected function parse_getIdentifier($do_error = true) { + $p =& $this->parser; + $tok = $p->token; + + if ($tok === CSSQueryTokenizer::TOK_IDENTIFIER) { + return $p->getTokenString(); + } elseif($tok === CSSQueryTokenizer::TOK_STRING) { + return str_replace(array('\\\'', '\\"', '\\\\'), array('\'', '"', '\\'), $p->getTokenString(1, -1)); + } elseif ($do_error) { + $this->error('Expected identifier at %pos%!'); + } + return false; + } + + /** + * Get query conditions (tag, attribute and filter conditions) + * @return array False on failure + * @see DomNode::match() + * @access private + */ + protected function parse_conditions() { + $p =& $this->parser; + $tok = $p->token; + + if ($tok === CSSQueryTokenizer::TOK_NULL) { + $this->error('Invalid search pattern(1): Empty string!'); + return false; + } + $conditions_all = array(); + + //Tags + while ($tok !== CSSQueryTokenizer::TOK_NULL) { + $conditions = array('tags' => array(), 'attributes' => array()); + + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $tok = $p->next(); + if (($tok === CSSQueryTokenizer::TOK_PIPE) && ($tok = $p->next()) && ($tok !== CSSQueryTokenizer::TOK_ALL)) { + if (($tag = $this->parse_getIdentifier()) === false) { + return false; + } + $conditions['tags'][] = array( + 'tag' => $tag, + 'compare' => 'name' + ); + $tok = $p->next_no_whitespace(); + } else { + $conditions['tags'][''] = array( + 'tag' => '', + 'match' => false + ); + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $tok = $p->next_no_whitespace(); + } + } + } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $conditions['tags'][] = array( + 'tag' => '', + 'compare' => 'namespace', + ); + } elseif (($tag = $this->parse_getIdentifier()) !== false) { + $conditions['tags'][] = array( + 'tag' => $tag, + 'compare' => 'total', + ); + } else { + return false; + } + $tok = $p->next_no_whitespace(); + } elseif ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { + $tok = $p->next_no_whitespace(); + $last_mode = 'or'; + + while (true) { + $match = true; + $compare = 'total'; + + if ($tok === CSSQueryTokenizer::TOK_NOT) { + $match = false; + $tok = $p->next_no_whitespace(); + } + + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_PIPE) { + $this->next(); + $compare = 'name'; + if (($tag = $this->parse_getIdentifier()) === false) { + return false; + } + } + } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $tag = ''; + $compare = 'namespace'; + } elseif (($tag = $this->parse_getIdentifier()) === false) { + return false; + } + $tok = $p->next_no_whitespace(); + } else { + if (($tag = $this->parse_getIdentifier()) === false) { + return false; + } + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $compare = 'namespace'; + } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { + $tag = $tag.':'.$tag_name; + } else { + return false; + } + + $tok = $p->next_no_whitespace(); + } + } + if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { + $tok = $p->next_no_whitespace(); + } + + $conditions['tags'][] = array( + 'tag' => $tag, + 'match' => $match, + 'operator' => $last_mode, + 'compare' => $compare + ); + switch($tok) { + case CSSQueryTokenizer::TOK_COMMA: + $tok = $p->next_no_whitespace(); + $last_mode = 'or'; + continue 2; + case CSSQueryTokenizer::TOK_PLUS: + $tok = $p->next_no_whitespace(); + $last_mode = 'and'; + continue 2; + case CSSQueryTokenizer::TOK_BRACE_CLOSE: + $tok = $p->next(); + break 2; + default: + $this->error('Expected closing brace or comma at pos %pos%!'); + return false; + } + } + } elseif (($tag = $this->parse_getIdentifier(false)) !== false) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $conditions['tags'][] = array( + 'tag' => $tag, + 'compare' => 'namespace' + ); + } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { + $tag = $tag.':'.$tag_name; + $conditions['tags'][] = array( + 'tag' => $tag, + 'match' => true + ); + } else { + return false; + } + + $tok = $p->next(); + } elseif ($tag === 'text' && $tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { + $pos = $p->getPos(); + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) { + $conditions['tags'][] = array( + 'tag' => '~text~', + 'match' => true + ); + $p->next(); + } else { + $p->setPos($pos); + } + } else { + $conditions['tags'][] = array( + 'tag' => $tag, + 'match' => true + ); + } + } else { + unset($conditions['tags']); + } + + //Class + $last_mode = 'or'; + if ($tok === CSSQueryTokenizer::TOK_CLASS) { + $p->next(); + if (($class = $this->parse_getIdentifier()) === false) { + return false; + } + + $conditions['attributes'][] = array( + 'attribute' => 'class', + 'operator_value' => 'contains_word', + 'value' => $class, + 'operator_result' => $last_mode + ); + $last_mode = 'and'; + $tok = $p->next(); + } + + //ID + if ($tok === CSSQueryTokenizer::TOK_ID) { + $p->next(); + if (($id = $this->parse_getIdentifier()) === false) { + return false; + } + + $conditions['attributes'][] = array( + 'attribute' => 'id', + 'operator_value' => 'equals', + 'value' => $id, + 'operator_result' => $last_mode + ); + $last_mode = 'and'; + $tok = $p->next(); + } + + //Attributes + if ($tok === CSSQueryTokenizer::TOK_BRACKET_OPEN) { + $tok = $p->next_no_whitespace(); + + while (true) { + $match = true; + $compare = 'total'; + if ($tok === CSSQueryTokenizer::TOK_NOT) { + $match = false; + $tok = $p->next_no_whitespace(); + } + + if ($tok === CSSQueryTokenizer::TOK_ALL) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + if (($attribute = $this->parse_getIdentifier()) === false) { + return false; + } + $compare = 'name'; + $tok = $p->next(); + } else { + $this->error('Expected pipe at pos %pos%!'); + return false; + } + } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + if (($tag = $this->parse_getIdentifier()) === false) { + return false; + } + $tok = $p->next_no_whitespace(); + } elseif (($attribute = $this->parse_getIdentifier()) !== false) { + $tok = $p->next(); + if ($tok === CSSQueryTokenizer::TOK_PIPE) { + $tok = $p->next(); + + if (($attribute_name = $this->parse_getIdentifier()) !== false) { + $attribute = $attribute.':'.$attribute_name; + } else { + return false; + } + + $tok = $p->next(); + } + } else { + return false; + } + if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { + $tok = $p->next_no_whitespace(); + } + + $operator_value = ''; + $val = ''; + switch($tok) { + case CSSQueryTokenizer::TOK_COMPARE_PREFIX: + case CSSQueryTokenizer::TOK_COMPARE_CONTAINS: + case CSSQueryTokenizer::TOK_COMPARE_CONTAINS_WORD: + case CSSQueryTokenizer::TOK_COMPARE_ENDS: + case CSSQueryTokenizer::TOK_COMPARE_EQUALS: + case CSSQueryTokenizer::TOK_COMPARE_NOT_EQUAL: + case CSSQueryTokenizer::TOK_COMPARE_REGEX: + case CSSQueryTokenizer::TOK_COMPARE_STARTS: + case CSSQueryTokenizer::TOK_COMPARE_BIGGER_THAN: + case CSSQueryTokenizer::TOK_COMPARE_SMALLER_THAN: + $operator_value = $p->getTokenString(($tok === CSSQueryTokenizer::TOK_COMPARE_EQUALS) ? 0 : -1); + $p->next_no_whitespace(); + + if (($val = $this->parse_getIdentifier()) === false) { + return false; + } + + $tok = $p->next_no_whitespace(); + break; + } + + if ($operator_value && $val) { + $conditions['attributes'][] = array( + 'attribute' => $attribute, + 'operator_value' => $operator_value, + 'value' => $val, + 'match' => $match, + 'operator_result' => $last_mode, + 'compare' => $compare + ); + } else { + $conditions['attributes'][] = array( + 'attribute' => $attribute, + 'value' => $match, + 'operator_result' => $last_mode, + 'compare' => $compare + ); + } + + switch($tok) { + case CSSQueryTokenizer::TOK_COMMA: + $tok = $p->next_no_whitespace(); + $last_mode = 'or'; + continue 2; + case CSSQueryTokenizer::TOK_PLUS: + $tok = $p->next_no_whitespace(); + $last_mode = 'and'; + continue 2; + case CSSQueryTokenizer::TOK_BRACKET_CLOSE: + $tok = $p->next(); + break 2; + default: + $this->error('Expected closing bracket or comma at pos %pos%!'); + return false; + } + } + } + + if (count($conditions['attributes']) < 1) { + unset($conditions['attributes']); + } + + while($tok === CSSQueryTokenizer::TOK_COLON) { + if (count($conditions) < 1) { + $conditions['tags'] = array(array( + 'tag' => '', + 'match' => false + )); + } + + $tok = $p->next(); + if (($filter = $this->parse_getIdentifier()) === false) { + return false; + } + + if (($tok = $p->next()) === CSSQueryTokenizer::TOK_BRACE_OPEN) { + $start = $p->pos; + $count = 1; + while ((($tok = $p->next()) !== CSSQueryTokenizer::TOK_NULL) && !(($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) && (--$count === 0))) { + if ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { + ++$count; + } + } + + + if ($tok !== CSSQueryTokenizer::TOK_BRACE_CLOSE) { + $this->error('Expected closing brace at pos %pos%!'); + return false; + } + $len = $p->pos - 1 - $start; + $params = (($len > 0) ? substr($p->doc, $start + 1, $len) : ''); + $tok = $p->next(); + } else { + $params = ''; + } + + $conditions['filters'][] = array('filter' => $filter, 'params' => $params); + } + if (count($conditions) < 1) { + $this->error('Invalid search pattern(2): No conditions found!'); + return false; + } + $conditions_all[] = $conditions; + + if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { + $tok = $p->next_no_whitespace(); + } + + if ($tok === CSSQueryTokenizer::TOK_COMMA) { + $tok = $p->next_no_whitespace(); + continue; + } else { + break; + } + } + + return $conditions_all; + } + + + /** + * Evaluate root node using custom callback + * @param array $conditions {@link parse_conditions()} + * @param bool|int $recursive + * @param bool $check_root + * @return array + * @access private + */ + protected function parse_callback($conditions, $recursive = true, $check_root = false) { + return ($this->result = $this->root->getChildrenByMatch( + $conditions, + $recursive, + $check_root, + $this->custom_filter_map + )); + } + + /** + * Parse first bit of query, only root node has to be evaluated now + * @param bool|int $recursive + * @return bool + * @internal Result of query is set in {@link $result} + * @access private + */ + protected function parse_single($recursive = true) { + if (($c = $this->parse_conditions()) === false) { + return false; + } + + $this->parse_callback($c, $recursive, $this->search_root); + return true; + } + + /** + * Evaluate sibling nodes + * @return bool + * @internal Result of query is set in {@link $result} + * @access private + */ + protected function parse_adjacent() { + $tmp = $this->result; + $this->result = array(); + if (($c = $this->parse_conditions()) === false) { + return false; + } + + foreach($tmp as $t) { + if (($sibling = $t->getNextSibling()) !== false) { + if ($sibling->match($c, true, $this->custom_filter_map)) { + $this->result[] = $sibling; + } + } + } + + return true; + } + + /** + * Evaluate {@link $result} + * @param bool $parent Evaluate parent nodes + * @param bool|int $recursive + * @return bool + * @internal Result of query is set in {@link $result} + * @access private + */ + protected function parse_result($parent = false, $recursive = true) { + $tmp = $this->result; + $tmp_res = array(); + if (($c = $this->parse_conditions()) === false) { + return false; + } + + foreach(array_keys($tmp) as $t) { + $this->root = (($parent) ? $tmp[$t]->parent : $tmp[$t]); + $this->parse_callback($c, $recursive); + foreach(array_keys($this->result) as $r) { + if (!in_array($this->result[$r], $tmp_res, true)) { + $tmp_res[] = $this->result[$r]; + } + } + } + $this->result = $tmp_res; + return true; + } + + /** + * Parse full query + * @return bool + * @internal Result of query is set in {@link $result} + * @access private + */ + protected function parse() { + $p =& $this->parser; + $p->setPos(0); + $this->result = array(); + + if (!$this->parse_single()) { + return false; + } + + while (count($this->result) > 0) { + switch($p->token) { + case CSSQueryTokenizer::TOK_CHILD: + $this->parser->next_no_whitespace(); + if (!$this->parse_result(false, 1)) { + return false; + } + break; + + case CSSQueryTokenizer::TOK_SIBLING: + $this->parser->next_no_whitespace(); + if (!$this->parse_result(true, 1)) { + return false; + } + break; + + case CSSQueryTokenizer::TOK_PLUS: + $this->parser->next_no_whitespace(); + if (!$this->parse_adjacent()) { + return false; + } + break; + + case CSSQueryTokenizer::TOK_ALL: + case CSSQueryTokenizer::TOK_IDENTIFIER: + case CSSQueryTokenizer::TOK_STRING: + case CSSQueryTokenizer::TOK_BRACE_OPEN: + case CSSQueryTokenizer::TOK_BRACKET_OPEN: + case CSSQueryTokenizer::TOK_ID: + case CSSQueryTokenizer::TOK_CLASS: + case CSSQueryTokenizer::TOK_COLON: + if (!$this->parse_result()) { + return false; + } + break; + + case CSSQueryTokenizer::TOK_NULL: + break 2; + + default: + $this->error('Invalid search pattern(3): No result modifier found!'); + return false; + } + } + + return true; + } +} + +?> diff --git a/lib-3rd-party/pquery/gan_tokenizer.php b/lib-3rd-party/pquery/gan_tokenizer.php new file mode 100644 index 0000000000..a8261a0dd9 --- /dev/null +++ b/lib-3rd-party/pquery/gan_tokenizer.php @@ -0,0 +1,566 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +namespace pQuery; + +/** + * Converts a document into tokens + * + * Can convert any string into tokens. The base class only supports + * identifier/whitespace tokens. For more tokens, the class can be + * easily extended. + * + * Use like: + * + * next() !== $a::TOK_NULL) { + * echo $a->token, ': ',$a->getTokenString(), "
\n"; + * } + * ?> + *
+ * + * @internal The tokenizer works with a character map that connects a certain + * character to a certain function/token. This class is build with speed in mind. + */ +class TokenizerBase { + + /** + * NULL Token, used at end of document (parsing should stop after this token) + */ + const TOK_NULL = 0; + /** + * Unknown token, used at unidentified character + */ + const TOK_UNKNOWN = 1; + /** + * Whitespace token, used with whitespace + */ + const TOK_WHITESPACE = 2; + /** + * Identifier token, used with identifiers + */ + const TOK_IDENTIFIER = 3; + + /** + * The document that is being tokenized + * @var string + * @internal Public for faster access! + * @see setDoc() + * @see getDoc() + * @access private + */ + var $doc = ''; + + /** + * The size of the document (length of string) + * @var int + * @internal Public for faster access! + * @see $doc + * @access private + */ + var $size = 0; + + /** + * Current (character) position in the document + * @var int + * @internal Public for faster access! + * @see setPos() + * @see getPos() + * @access private + */ + var $pos = 0; + + /** + * Current (Line/Column) position in document + * @var array (Current_Line, Line_Starting_Pos) + * @internal Public for faster access! + * @see getLinePos() + * @access private + */ + var $line_pos = array(0, 0); + + /** + * Current token + * @var int + * @internal Public for faster access! + * @see getToken() + * @access private + */ + var $token = self::TOK_NULL; + + /** + * Start position of token. If NULL, then current position is used. + * @var int + * @internal Public for faster access! + * @see getTokenString() + * @access private + */ + var $token_start = null; + + /** + * List with all the character that can be considered as whitespace + * @var array|string + * @internal Variable is public + associated array for faster access! + * @internal array(' ' => true) will recognize space (' ') as whitespace + * @internal String will be converted to array in constructor + * @internal Result token will be {@link self::TOK_WHITESPACE}; + * @see setWhitespace() + * @see getWhitespace() + * @access private + */ + var $whitespace = " \t\n\r\0\x0B"; + + /** + * List with all the character that can be considered as identifier + * @var array|string + * @internal Variable is public + associated array for faster access! + * @internal array('a' => true) will recognize 'a' as identifier + * @internal String will be converted to array in constructor + * @internal Result token will be {@link self::TOK_IDENTIFIER}; + * @see setIdentifiers() + * @see getIdentifiers() + * @access private + */ + var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_'; + + /** + * All characters that should be mapped to a token/function that cannot be considered as whitespace or identifier + * @var array + * @internal Variable is public + associated array for faster access! + * @internal array('a' => 'parse_a') will call $this->parse_a() if it matches the character 'a' + * @internal array('a' => self::TOK_A) will set token to TOK_A if it matches the character 'a' + * @see mapChar() + * @see unmapChar() + * @access private + */ + var $custom_char_map = array(); + + /** + * Automatically built character map. Built using {@link $identifiers}, {@link $whitespace} and {@link $custom_char_map} + * @var array + * @internal Public for faster access! + * @access private + */ + var $char_map = array(); + + /** + * All errors found while parsing the document + * @var array + * @see addError() + */ + var $errors = array(); + + /** + * Class constructor + * @param string $doc Document to be tokenized + * @param int $pos Position to start parsing + * @see setDoc() + * @see setPos() + */ + function __construct($doc = '', $pos = 0) { + $this->setWhitespace($this->whitespace); + $this->setIdentifiers($this->identifiers); + + $this->setDoc($doc, $pos); + } + + #php4 PHP4 class constructor compatibility + #function TokenizerBase($doc = '', $pos = 0) {return $this->__construct($doc, $pos);} + #php4e + + /** + * Sets target document + * @param string $doc Document to be tokenized + * @param int $pos Position to start parsing + * @see getDoc() + * @see setPos() + */ + function setDoc($doc, $pos = 0) { + $this->doc = $doc; + $this->size = strlen($doc); + $this->setPos($pos); + } + + /** + * Returns target document + * @return string + * @see setDoc() + */ + function getDoc() { + return $this->doc; + } + + /** + * Sets position in document + * @param int $pos + * @see getPos() + */ + function setPos($pos = 0) { + $this->pos = $pos - 1; + $this->line_pos = array(0, 0); + $this->next(); + } + + /** + * Returns current position in document (Index) + * @return int + * @see setPos() + */ + function getPos() { + return $this->pos; + } + + /** + * Returns current position in document (Line/Char) + * @return array array(Line, Column) + */ + function getLinePos() { + return array($this->line_pos[0], $this->pos - $this->line_pos[1]); + } + + /** + * Returns current token + * @return int + * @see $token + */ + function getToken() { + return $this->token; + } + + /** + * Returns current token as string + * @param int $start_offset Offset from token start + * @param int $end_offset Offset from token end + * @return string + */ + function getTokenString($start_offset = 0, $end_offset = 0) { + $token_start = ((is_int($this->token_start)) ? $this->token_start : $this->pos) + $start_offset; + $len = $this->pos - $token_start + 1 + $end_offset; + return (($len > 0) ? substr($this->doc, $token_start, $len) : ''); + } + + /** + * Sets characters to be recognized as whitespace + * + * Used like: setWhitespace('ab') or setWhitespace(array('a' => true, 'b', 'c')); + * @param string|array $ws + * @see getWhitespace(); + */ + function setWhitespace($ws) { + if (is_array($ws)) { + $this->whitespace = array_fill_keys(array_values($ws), true); + $this->buildCharMap(); + } else { + $this->setWhiteSpace(str_split($ws)); + } + } + + /** + * Returns whitespace characters as string/array + * @param bool $as_string Should the result be a string or an array? + * @return string|array + * @see setWhitespace() + */ + function getWhitespace($as_string = true) { + $ws = array_keys($this->whitespace); + return (($as_string) ? implode('', $ws) : $ws); + } + + /** + * Sets characters to be recognized as identifier + * + * Used like: setIdentifiers('ab') or setIdentifiers(array('a' => true, 'b', 'c')); + * @param string|array $ident + * @see getIdentifiers(); + */ + function setIdentifiers($ident) { + if (is_array($ident)) { + $this->identifiers = array_fill_keys(array_values($ident), true); + $this->buildCharMap(); + } else { + $this->setIdentifiers(str_split($ident)); + } + } + + /** + * Returns identifier characters as string/array + * @param bool $as_string Should the result be a string or an array? + * @return string|array + * @see setIdentifiers() + */ + function getIdentifiers($as_string = true) { + $ident = array_keys($this->identifiers); + return (($as_string) ? implode('', $ident) : $ident); + } + + /** + * Maps a custom character to a token/function + * + * Used like: mapChar('a', self::{@link TOK_IDENTIFIER}) or mapChar('a', 'parse_identifier'); + * @param string $char Character that should be mapped. If set, it will be overridden + * @param int|string $map If function name, then $this->function will be called, otherwise token is set to $map + * @see unmapChar() + */ + function mapChar($char, $map) { + $this->custom_char_map[$char] = $map; + $this->buildCharMap(); + } + + /** + * Removes a char mapped with {@link mapChar()} + * @param string $char Character that should be unmapped + * @see mapChar() + */ + function unmapChar($char) { + unset($this->custom_char_map[$char]); + $this->buildCharMap(); + } + + /** + * Builds the {@link $map_char} array + * @internal Builds single array that maps all characters. Gets called if {@link $whitespace}, {@link $identifiers} or {@link $custom_char_map} get modified + */ + protected function buildCharMap() { + $this->char_map = $this->custom_char_map; + if (is_array($this->whitespace)) { + foreach($this->whitespace as $w => $v) { + $this->char_map[$w] = 'parse_whitespace'; + } + } + if (is_array($this->identifiers)) { + foreach($this->identifiers as $i => $v) { + $this->char_map[$i] = 'parse_identifier'; + } + } + } + + /** + * Add error to the array and appends current position + * @param string $error + */ + function addError($error) { + $this->errors[] = htmlentities($error.' at '.($this->line_pos[0] + 1).', '.($this->pos - $this->line_pos[1] + 1).'!'); + } + + /** + * Parse line breaks and increase line number + * @internal Gets called to process line breaks + */ + protected function parse_linebreak() { + if($this->doc[$this->pos] === "\r") { + ++$this->line_pos[0]; + if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === "\n")) { + ++$this->pos; + } + $this->line_pos[1] = $this->pos; + } elseif($this->doc[$this->pos] === "\n") { + ++$this->line_pos[0]; + $this->line_pos[1] = $this->pos; + } + } + + /** + * Parse whitespace + * @return int Token + * @internal Gets called with {@link $whitespace} characters + */ + protected function parse_whitespace() { + $this->token_start = $this->pos; + + while(++$this->pos < $this->size) { + if (!isset($this->whitespace[$this->doc[$this->pos]])) { + break; + } else { + $this->parse_linebreak(); + } + } + + --$this->pos; + return self::TOK_WHITESPACE; + } + + /** + * Parse identifiers + * @return int Token + * @internal Gets called with {@link $identifiers} characters + */ + protected function parse_identifier() { + $this->token_start = $this->pos; + + while((++$this->pos < $this->size) && isset($this->identifiers[$this->doc[$this->pos]])) {} + + --$this->pos; + return self::TOK_IDENTIFIER; + } + + /** + * Continues to the next token + * @return int Next token ({@link TOK_NULL} if none) + */ + function next() { + $this->token_start = null; + + if (++$this->pos < $this->size) { + if (isset($this->char_map[$this->doc[$this->pos]])) { + if (is_string($this->char_map[$this->doc[$this->pos]])) { + return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); + } else { + return ($this->token = $this->char_map[$this->doc[$this->pos]]); + } + } else { + return ($this->token = self::TOK_UNKNOWN); + } + } else { + return ($this->token = self::TOK_NULL); + } + } + + /** + * Finds the next token, but skips whitespace + * @return int Next token ({@link TOK_NULL} if none) + */ + function next_no_whitespace() { + $this->token_start = null; + + while (++$this->pos < $this->size) { + if (!isset($this->whitespace[$this->doc[$this->pos]])) { + if (isset($this->char_map[$this->doc[$this->pos]])) { + if (is_string($this->char_map[$this->doc[$this->pos]])) { + return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); + } else { + return ($this->token = $this->char_map[$this->doc[$this->pos]]); + } + } else { + return ($this->token = self::TOK_UNKNOWN); + } + } else { + $this->parse_linebreak(); + } + } + + return ($this->token = self::TOK_NULL); + } + + /** + * Finds the next token using stop characters. + * + * Used like: next_search('abc') or next_search(array('a' => true, 'b' => true, 'c' => true)); + * @param string|array $characters Characters to search for + * @param bool $callback Should the function check the charmap after finding a character? + * @return int Next token ({@link TOK_NULL} if none) + */ + function next_search($characters, $callback = true) { + $this->token_start = $this->pos; + if (!is_array($characters)) { + $characters = array_fill_keys(str_split($characters), true); + } + + while(++$this->pos < $this->size) { + if (isset($characters[$this->doc[$this->pos]])) { + if ($callback && isset($this->char_map[$this->doc[$this->pos]])) { + if (is_string($this->char_map[$this->doc[$this->pos]])) { + return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); + } else { + return ($this->token = $this->char_map[$this->doc[$this->pos]]); + } + } else { + return ($this->token = self::TOK_UNKNOWN); + } + } else { + $this->parse_linebreak(); + } + } + + return ($this->token = self::TOK_NULL); + } + + /** + * Finds the next token by searching for a string + * @param string $needle The needle that's being searched for + * @param bool $callback Should the function check the charmap after finding the needle? + * @return int Next token ({@link TOK_NULL} if none) + */ + function next_pos($needle, $callback = true) { + $this->token_start = $this->pos; + if (($this->pos < $this->size) && (($p = stripos($this->doc, $needle, $this->pos + 1)) !== false)) { + + $len = $p - $this->pos - 1; + if ($len > 0) { + $str = substr($this->doc, $this->pos + 1, $len); + + if (($l = strrpos($str, "\n")) !== false) { + ++$this->line_pos[0]; + $this->line_pos[1] = $l + $this->pos + 1; + + $len -= $l; + if ($len > 0) { + $str = substr($str, 0, -$len); + $this->line_pos[0] += substr_count($str, "\n"); + } + } + } + + $this->pos = $p; + if ($callback && isset($this->char_map[$this->doc[$this->pos]])) { + if (is_string($this->char_map[$this->doc[$this->pos]])) { + return ($this->token = $this->{$this->char_map[$this->doc[$this->pos]]}()); + } else { + return ($this->token = $this->char_map[$this->doc[$this->pos]]); + } + } else { + return ($this->token = self::TOK_UNKNOWN); + } + } else { + $this->pos = $this->size; + return ($this->token = self::TOK_NULL); + } + } + + /** + * Expect a specific token or character. Adds error if token doesn't match. + * @param string|int $token Character or token to expect + * @param bool|int $do_next Go to next character before evaluating. 1 for next char, true to ignore whitespace + * @param bool|int $try_next Try next character if current doesn't match. 1 for next char, true to ignore whitespace + * @param bool|int $next_on_match Go to next character after evaluating. 1 for next char, true to ignore whitespace + * @return bool + */ + protected function expect($token, $do_next = true, $try_next = false, $next_on_match = 1) { + if ($do_next) { + if ($do_next === 1) { + $this->next(); + } else { + $this->next_no_whitespace(); + } + } + + if (is_int($token)) { + if (($this->token !== $token) && ((!$try_next) || ((($try_next === 1) && ($this->next() !== $token)) || (($try_next === true) && ($this->next_no_whitespace() !== $token))))) { + $this->addError('Unexpected "'.$this->getTokenString().'"'); + return false; + } + } else { + if (($this->doc[$this->pos] !== $token) && ((!$try_next) || (((($try_next === 1) && ($this->next() !== self::TOK_NULL)) || (($try_next === true) && ($this->next_no_whitespace() !== self::TOK_NULL))) && ($this->doc[$this->pos] !== $token)))) { + $this->addError('Expected "'.$token.'", but found "'.$this->getTokenString().'"'); + return false; + } + } + + if ($next_on_match) { + if ($next_on_match === 1) { + $this->next(); + } else { + $this->next_no_whitespace(); + } + } + return true; + } +} + +?> \ No newline at end of file diff --git a/lib-3rd-party/pquery/gan_xml2array.php b/lib-3rd-party/pquery/gan_xml2array.php new file mode 100644 index 0000000000..f790926861 --- /dev/null +++ b/lib-3rd-party/pquery/gan_xml2array.php @@ -0,0 +1,101 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +namespace pQuery; + +/** + * Converts a XML document to an array + */ +class XML2ArrayParser extends HtmlParserBase { + + /** + * Holds the document structure + * @var array array('name' => 'tag', 'attrs' => array('attr' => 'val'), 'childen' => array()) + */ + var $root = array( + 'name' => '', + 'attrs' => array(), + 'children' => array() + ); + + /** + * Current parsing hierarchy + * @var array + * @access private + */ + var $hierarchy = array(); + + protected function parse_hierarchy($self_close) { + if ($this->status['closing_tag']) { + $found = false; + for ($count = count($this->hierarchy), $i = $count - 1; $i >= 0; $i--) { + if (strcasecmp($this->hierarchy[$i]['name'], $this->status['tag_name']) === 0) { + + for($ii = ($count - $i - 1); $ii >= 0; $ii--) { + $e = array_pop($this->hierarchy); + if ($ii > 0) { + $this->addError('Closing tag "'.$this->status['tag_name'].'" while "'.$e['name'].'" is not closed yet'); + } + } + + $found = true; + break; + } + } + + if (!$found) { + $this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open'); + } + } else { + $tag = array( + 'name' => $this->status['tag_name'], + 'attrs' => $this->status['attributes'], + 'children' => array() + ); + if ($this->hierarchy) { + $current =& $this->hierarchy[count($this->hierarchy) - 1]; + $current['children'][] = $tag; + $tag =& $current['children'][count($current['children']) - 1]; + unset($current['tagData']); + } else { + $this->root = $tag; + $tag =& $this->root; + $self_close = false; + } + if (!$self_close) { + $this->hierarchy[] =& $tag; + } + } + } + + function parse_tag_default() { + if (!parent::parse_tag_default()) {return false;} + + if ($this->status['tag_name'][0] !== '?') { + $this->parse_hierarchy(($this->status['self_close']) ? true : null); + } + return true; + } + + function parse_text() { + parent::parse_text(); + if (($this->status['text'] !== '') && $this->hierarchy) { + $current =& $this->hierarchy[count($this->hierarchy) - 1]; + if (!$current['children']) { + $current['tagData'] = $this->status['text']; + } + } + } + + function parse_all() { + return ((parent::parse_all()) ? $this->root : false); + } +} + +?> \ No newline at end of file diff --git a/lib-3rd-party/pquery/ganon.php b/lib-3rd-party/pquery/ganon.php new file mode 100644 index 0000000000..4daaa8b70b --- /dev/null +++ b/lib-3rd-party/pquery/ganon.php @@ -0,0 +1,101 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +use pQuery\Html5Parser; +use pQuery\HtmlFormatter; + +/** + * Returns HTML DOM from string + * @param string $str + * @param bool $return_root Return root node or return parser object + * @return Html5Parser|DomNode + */ +function str_get_dom($str, $return_root = true) { + $a = new Html5Parser($str); + return (($return_root) ? $a->root : $a); +} + +/** + * Returns HTML DOM from file/website + * @param string $str + * @param bool $return_root Return root node or return parser object + * @param bool $use_include_path Use include path search in file_get_contents + * @param resource $context Context resource used in file_get_contents (PHP >= 5.0.0) + * @return Html5Parser|DomNode + */ +function file_get_dom($file, $return_root = true, $use_include_path = false, $context = null) { + if (version_compare(PHP_VERSION, '5.0.0', '>=')) + $f = file_get_contents($file, $use_include_path, $context); + else { + if ($context !== null) + trigger_error('Context parameter not supported in this PHP version'); + $f = file_get_contents($file, $use_include_path); + } + + return (($f === false) ? false : str_get_dom($f, $return_root)); +} + +/** + * Format/beautify DOM + * @param DomNode $root + * @param array $options Extra formatting options {@link Formatter::$options} + * @return bool + */ +function dom_format(&$root, $options = array()) { + $formatter = new HtmlFormatter($options); + return $formatter->format($root); +} + +if (version_compare(PHP_VERSION, '5.0.0', '<')) { + /** + * PHP alternative to str_split, for backwards compatibility + * @param string $string + * @return string + */ + function str_split($string) { + $res = array(); + $size = strlen($string); + for ($i = 0; $i < $size; $i++) { + $res[] = $string[$i]; + } + + return $res; + } +} + +if (version_compare(PHP_VERSION, '5.2.0', '<')) { + /** + * PHP alternative to array_fill_keys, for backwards compatibility + * @param array $keys + * @param mixed $value + * @return array + */ + function array_fill_keys($keys, $value) { + $res = array(); + foreach($keys as $k) { + $res[$k] = $value; + } + + return $res; + } +} + +#!! <- Ignore when converting to single file +if (!defined('GANON_NO_INCLUDES')) { + define('GANON_NO_INCLUDES', true); + include_once('IQuery.php'); + include_once('gan_tokenizer.php'); + include_once('gan_parser_html.php'); + include_once('gan_node_html.php'); + include_once('gan_selector_html.php'); + include_once('gan_formatter.php'); +} +#! + +?> \ No newline at end of file diff --git a/lib-3rd-party/pquery/pQuery.php b/lib-3rd-party/pquery/pQuery.php new file mode 100644 index 0000000000..7d451e813c --- /dev/null +++ b/lib-3rd-party/pquery/pQuery.php @@ -0,0 +1,279 @@ + + * @copyright 2010 Niels A.D., 2014 Todd Burry + * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 + * @package pQuery + */ + +use pQuery\IQuery; + +/** + * A jQuery-like object for php. + */ +class pQuery implements ArrayAccess, IteratorAggregate, IQuery { + /// Properties /// + + /** + * @var IQuery[] + */ + protected $nodes = array(); + + /// Methods /// + + public function __construct($nodes = array()) { + $this->nodes = $nodes; + } + + public function addClass($classname) { + foreach ($this->nodes as $node) { + $node->addClass($classname); + } + return $this; + } + + public function after($content) { + foreach ($this->nodes as $node) { + $node->after($content); + } + return $this; + } + + public function append($content) { + foreach ($this->nodes as $node) { + $node->append($content); + } + return $this; + } + + public function attr($name, $value = null) { + if (empty($this->nodes) && $value === null) + return ''; + + foreach ($this->nodes as $node) { + if ($value === null) + return $node->attr($name); + $node->attr($name, $value); + } + return $this; + } + + public function before($content) { + foreach ($this->nodes as $node) { + $node->before($content); + } + return $this; + } + + public function clear() { + foreach ($this->nodes as $node) { + $node->clear(); + } + return $this; + } + + /** + * Get the count of matched elements. + * + * @return int Returns the count of matched elements. + */ + public function count() { + return count($this->nodes); + } + + /** + * Format/beautify a DOM. + * + * @param pQuery\DomNode $dom The dom to format. + * @param array $options Extra formatting options. See {@link pQuery\HtmlFormatter::$options}. + * @return bool Returns `true` on sucess and `false` on failure. + */ +// public static function format($dom, $options = array()) { +// $formatter = new pQuery\HtmlFormatter($options); +// return $formatter->format($dom); +// } + + public function getIterator() { + return new ArrayIterator($this->nodes); + } + + public function hasClass($classname) { + foreach ($this->nodes as $node) { + if ($node->hasClass($classname)) + return true; + } + return false; + } + + public function html($value = null) { + if (empty($this->nodes) && $value === null) + return ''; + + foreach ($this->nodes as $node) { + if ($value === null) + return $node->html(); + $node->html($value); + } + return $this; + } + + public function offsetExists($offset) { + return isset($this->nodes[$offset]); + } + + public function offsetGet($offset) { + return isset($this->nodes[$offset]) ? $this->nodes[$offset] : null; + } + + public function offsetSet($offset, $value) { + + if (is_null($offset) || !isset($this->nodes[$offset])) { + throw new \BadMethodCallException("You are not allowed to add new nodes to the pQuery object."); + } else { + $this->nodes[$offset]->replaceWith($value); + } + } + + public function offsetUnset($offset) { + if (isset($this->nodes[$offset])) { + $this->nodes[$offset]->remove(); + unset($this->nodes[$offset]); + } + } + + /** + * Query a file or url. + * + * @param string $path The path to the url. + * @param resource $context A context suitable to be passed into {@link file_get_contents} + * @return pQuery\DomNode Returns the root dom node for the html file. + */ + public static function parseFile($path, $context = null) { + $html_str = file_get_contents($path, false, $context); + return static::parseStr($html_str); + } + + /** + * Query a string of html. + * + * @param string $html + * @return pQuery\DomNode Returns the root dom node for the html string. + */ + public static function parseStr($html) { + $parser = new pQuery\Html5Parser($html); + return $parser->root; + } + + public function prepend($content = null) { + foreach ($this->nodes as $node) { + $node->prepend($content); + } + return $this; + } + + public function prop($name, $value = null) { + if (empty($this->nodes) && $value === null) + return ''; + + foreach ($this->nodes as $node) { + if ($value === null) + return $node->prop($name); + $node->prop($name, $value); + } + return $this; + } + + public function remove($selector = null) { + foreach ($this->nodes as $node) { + $node->remove($selector); + } + if ($selector === null) + $this->nodes = array(); + + return $this; + } + + public function removeAttr($name) { + foreach ($this->nodes as $node) { + $node->removeAttr($name); + } + return $this; + } + + public function removeClass($classname) { + foreach ($this->nodes as $node) { + $node->removeClass($classname); + } + return $this; + } + + public function replaceWith($content) { + foreach ($this->nodes as &$node) { + $node = $node->replaceWith($content); + } + return $this; + } + + public function tagName($value = null) { + foreach ($this->nodes as $node) { + if ($value === null) + return $node->tagName(); + $node->tagName($value); + } + return $this; + } + + public function text($value = null) { + if (empty($this->nodes) && $value === null) + return ''; + + foreach ($this->nodes as $node) { + if ($value === null) + return $node->text(); + $node->text($value); + } + return $this; + } + + public function toggleClass($classname, $switch = null) { + foreach ($this->nodes as $node) { + $node->toggleClass($classname, $switch); + } + + return $this; + } + + public function unwrap() { + foreach ($this->nodes as $node) { + $node->unwrap(); + } + return $this; + } + + public function val($value = null) { + if (empty($this->nodes) && $value === null) + return ''; + + foreach ($this->nodes as $node) { + if ($value === null) + return $node->val(); + $node->val($value); + } + return $this; + } + + public function wrap($wrapping_element) { + foreach ($this->nodes as $node) { + $node->wrap($wrapping_element); + } + return $this; + } + + public function wrapInner($wrapping_element) { + foreach ($this->nodes as $node) { + $node->wrapInner($wrapping_element); + } + return $this; + } +} diff --git a/lib-3rd-party/pquery/third_party/jsminplus.php b/lib-3rd-party/pquery/third_party/jsminplus.php new file mode 100644 index 0000000000..d12b8c876e --- /dev/null +++ b/lib-3rd-party/pquery/third_party/jsminplus.php @@ -0,0 +1,2086 @@ + + * + * Usage: $minified = JSMinPlus::minify($script [, $filename]) + * + * Versionlog (see also changelog.txt): + * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top + * reduce memory footprint by minifying by block-scope + * some small byte-saving and performance improvements + * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs + * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes + * 12-04-2009 - some small bugfixes and performance improvements + * 09-04-2009 - initial open sourced version 1.0 + * + * Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip + * + */ + +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is the Narcissus JavaScript engine. + * + * The Initial Developer of the Original Code is + * Brendan Eich . + * Portions created by the Initial Developer are Copyright (C) 2004 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): Tino Zijdel + * PHP port, modifications and minifier routine are (C) 2009-2011 + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +define('TOKEN_END', 1); +define('TOKEN_NUMBER', 2); +define('TOKEN_IDENTIFIER', 3); +define('TOKEN_STRING', 4); +define('TOKEN_REGEXP', 5); +define('TOKEN_NEWLINE', 6); +define('TOKEN_CONDCOMMENT_START', 7); +define('TOKEN_CONDCOMMENT_END', 8); + +define('JS_SCRIPT', 100); +define('JS_BLOCK', 101); +define('JS_LABEL', 102); +define('JS_FOR_IN', 103); +define('JS_CALL', 104); +define('JS_NEW_WITH_ARGS', 105); +define('JS_INDEX', 106); +define('JS_ARRAY_INIT', 107); +define('JS_OBJECT_INIT', 108); +define('JS_PROPERTY_INIT', 109); +define('JS_GETTER', 110); +define('JS_SETTER', 111); +define('JS_GROUP', 112); +define('JS_LIST', 113); + +define('JS_MINIFIED', 999); + +define('DECLARED_FORM', 0); +define('EXPRESSED_FORM', 1); +define('STATEMENT_FORM', 2); + +/* Operators */ +define('OP_SEMICOLON', ';'); +define('OP_COMMA', ','); +define('OP_HOOK', '?'); +define('OP_COLON', ':'); +define('OP_OR', '||'); +define('OP_AND', '&&'); +define('OP_BITWISE_OR', '|'); +define('OP_BITWISE_XOR', '^'); +define('OP_BITWISE_AND', '&'); +define('OP_STRICT_EQ', '==='); +define('OP_EQ', '=='); +define('OP_ASSIGN', '='); +define('OP_STRICT_NE', '!=='); +define('OP_NE', '!='); +define('OP_LSH', '<<'); +define('OP_LE', '<='); +define('OP_LT', '<'); +define('OP_URSH', '>>>'); +define('OP_RSH', '>>'); +define('OP_GE', '>='); +define('OP_GT', '>'); +define('OP_INCREMENT', '++'); +define('OP_DECREMENT', '--'); +define('OP_PLUS', '+'); +define('OP_MINUS', '-'); +define('OP_MUL', '*'); +define('OP_DIV', '/'); +define('OP_MOD', '%'); +define('OP_NOT', '!'); +define('OP_BITWISE_NOT', '~'); +define('OP_DOT', '.'); +define('OP_LEFT_BRACKET', '['); +define('OP_RIGHT_BRACKET', ']'); +define('OP_LEFT_CURLY', '{'); +define('OP_RIGHT_CURLY', '}'); +define('OP_LEFT_PAREN', '('); +define('OP_RIGHT_PAREN', ')'); +define('OP_CONDCOMMENT_END', '@*/'); + +define('OP_UNARY_PLUS', 'U+'); +define('OP_UNARY_MINUS', 'U-'); + +/* Keywords */ +define('KEYWORD_BREAK', 'break'); +define('KEYWORD_CASE', 'case'); +define('KEYWORD_CATCH', 'catch'); +define('KEYWORD_CONST', 'const'); +define('KEYWORD_CONTINUE', 'continue'); +define('KEYWORD_DEBUGGER', 'debugger'); +define('KEYWORD_DEFAULT', 'default'); +define('KEYWORD_DELETE', 'delete'); +define('KEYWORD_DO', 'do'); +define('KEYWORD_ELSE', 'else'); +define('KEYWORD_ENUM', 'enum'); +define('KEYWORD_FALSE', 'false'); +define('KEYWORD_FINALLY', 'finally'); +define('KEYWORD_FOR', 'for'); +define('KEYWORD_FUNCTION', 'function'); +define('KEYWORD_IF', 'if'); +define('KEYWORD_IN', 'in'); +define('KEYWORD_INSTANCEOF', 'instanceof'); +define('KEYWORD_NEW', 'new'); +define('KEYWORD_NULL', 'null'); +define('KEYWORD_RETURN', 'return'); +define('KEYWORD_SWITCH', 'switch'); +define('KEYWORD_THIS', 'this'); +define('KEYWORD_THROW', 'throw'); +define('KEYWORD_TRUE', 'true'); +define('KEYWORD_TRY', 'try'); +define('KEYWORD_TYPEOF', 'typeof'); +define('KEYWORD_VAR', 'var'); +define('KEYWORD_VOID', 'void'); +define('KEYWORD_WHILE', 'while'); +define('KEYWORD_WITH', 'with'); + + +class JSMinPlus +{ + private $parser; + private $reserved = array( + 'break', 'case', 'catch', 'continue', 'default', 'delete', 'do', + 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof', + 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var', + 'void', 'while', 'with', + // Words reserved for future use + 'abstract', 'boolean', 'byte', 'char', 'class', 'const', 'debugger', + 'double', 'enum', 'export', 'extends', 'final', 'float', 'goto', + 'implements', 'import', 'int', 'interface', 'long', 'native', + 'package', 'private', 'protected', 'public', 'short', 'static', + 'super', 'synchronized', 'throws', 'transient', 'volatile', + // These are not reserved, but should be taken into account + // in isValidIdentifier (See jslint source code) + 'arguments', 'eval', 'true', 'false', 'Infinity', 'NaN', 'null', 'undefined' + ); + + private function __construct() + { + $this->parser = new JSParser($this); + } + + public static function minify($js, $filename='') + { + static $instance; + + // this is a singleton + if(!$instance) + $instance = new JSMinPlus(); + + return $instance->min($js, $filename); + } + + private function min($js, $filename) + { + try + { + $n = $this->parser->parse($js, $filename, 1); + return $this->parseTree($n); + } + catch(Exception $e) + { + echo $e->getMessage() . "\n"; + } + + return false; + } + + public function parseTree($n, $noBlockGrouping = false) + { + $s = ''; + + switch ($n->type) + { + case JS_MINIFIED: + $s = $n->value; + break; + + case JS_SCRIPT: + // we do nothing yet with funDecls or varDecls + $noBlockGrouping = true; + // FALL THROUGH + + case JS_BLOCK: + $childs = $n->treeNodes; + $lastType = 0; + for ($c = 0, $i = 0, $j = count($childs); $i < $j; $i++) + { + $type = $childs[$i]->type; + $t = $this->parseTree($childs[$i]); + if (strlen($t)) + { + if ($c) + { + $s = rtrim($s, ';'); + + if ($type == KEYWORD_FUNCTION && $childs[$i]->functionForm == DECLARED_FORM) + { + // put declared functions on a new line + $s .= "\n"; + } + elseif ($type == KEYWORD_VAR && $type == $lastType) + { + // mutiple var-statements can go into one + $t = ',' . substr($t, 4); + } + else + { + // add terminator + $s .= ';'; + } + } + + $s .= $t; + + $c++; + $lastType = $type; + } + } + + if ($c > 1 && !$noBlockGrouping) + { + $s = '{' . $s . '}'; + } + break; + + case KEYWORD_FUNCTION: + $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '('; + $params = $n->params; + for ($i = 0, $j = count($params); $i < $j; $i++) + $s .= ($i ? ',' : '') . $params[$i]; + $s .= '){' . $this->parseTree($n->body, true) . '}'; + break; + + case KEYWORD_IF: + $s = 'if(' . $this->parseTree($n->condition) . ')'; + $thenPart = $this->parseTree($n->thenPart); + $elsePart = $n->elsePart ? $this->parseTree($n->elsePart) : null; + + // empty if-statement + if ($thenPart == '') + $thenPart = ';'; + + if ($elsePart) + { + // be carefull and always make a block out of the thenPart; could be more optimized but is a lot of trouble + if ($thenPart != ';' && $thenPart[0] != '{') + $thenPart = '{' . $thenPart . '}'; + + $s .= $thenPart . 'else'; + + // we could check for more, but that hardly ever applies so go for performance + if ($elsePart[0] != '{') + $s .= ' '; + + $s .= $elsePart; + } + else + { + $s .= $thenPart; + } + break; + + case KEYWORD_SWITCH: + $s = 'switch(' . $this->parseTree($n->discriminant) . '){'; + $cases = $n->cases; + for ($i = 0, $j = count($cases); $i < $j; $i++) + { + $case = $cases[$i]; + if ($case->type == KEYWORD_CASE) + $s .= 'case' . ($case->caseLabel->type != TOKEN_STRING ? ' ' : '') . $this->parseTree($case->caseLabel) . ':'; + else + $s .= 'default:'; + + $statement = $this->parseTree($case->statements, true); + if ($statement) + { + $s .= $statement; + // no terminator for last statement + if ($i + 1 < $j) + $s .= ';'; + } + } + $s .= '}'; + break; + + case KEYWORD_FOR: + $s = 'for(' . ($n->setup ? $this->parseTree($n->setup) : '') + . ';' . ($n->condition ? $this->parseTree($n->condition) : '') + . ';' . ($n->update ? $this->parseTree($n->update) : '') . ')'; + + $body = $this->parseTree($n->body); + if ($body == '') + $body = ';'; + + $s .= $body; + break; + + case KEYWORD_WHILE: + $s = 'while(' . $this->parseTree($n->condition) . ')'; + + $body = $this->parseTree($n->body); + if ($body == '') + $body = ';'; + + $s .= $body; + break; + + case JS_FOR_IN: + $s = 'for(' . ($n->varDecl ? $this->parseTree($n->varDecl) : $this->parseTree($n->iterator)) . ' in ' . $this->parseTree($n->object) . ')'; + + $body = $this->parseTree($n->body); + if ($body == '') + $body = ';'; + + $s .= $body; + break; + + case KEYWORD_DO: + $s = 'do{' . $this->parseTree($n->body, true) . '}while(' . $this->parseTree($n->condition) . ')'; + break; + + case KEYWORD_BREAK: + case KEYWORD_CONTINUE: + $s = $n->value . ($n->label ? ' ' . $n->label : ''); + break; + + case KEYWORD_TRY: + $s = 'try{' . $this->parseTree($n->tryBlock, true) . '}'; + $catchClauses = $n->catchClauses; + for ($i = 0, $j = count($catchClauses); $i < $j; $i++) + { + $t = $catchClauses[$i]; + $s .= 'catch(' . $t->varName . ($t->guard ? ' if ' . $this->parseTree($t->guard) : '') . '){' . $this->parseTree($t->block, true) . '}'; + } + if ($n->finallyBlock) + $s .= 'finally{' . $this->parseTree($n->finallyBlock, true) . '}'; + break; + + case KEYWORD_THROW: + case KEYWORD_RETURN: + $s = $n->type; + if ($n->value) + { + $t = $this->parseTree($n->value); + if (strlen($t)) + { + if ($this->isWordChar($t[0]) || $t[0] == '\\') + $s .= ' '; + + $s .= $t; + } + } + break; + + case KEYWORD_WITH: + $s = 'with(' . $this->parseTree($n->object) . ')' . $this->parseTree($n->body); + break; + + case KEYWORD_VAR: + case KEYWORD_CONST: + $s = $n->value . ' '; + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + { + $t = $childs[$i]; + $s .= ($i ? ',' : '') . $t->name; + $u = $t->initializer; + if ($u) + $s .= '=' . $this->parseTree($u); + } + break; + + case KEYWORD_IN: + case KEYWORD_INSTANCEOF: + $left = $this->parseTree($n->treeNodes[0]); + $right = $this->parseTree($n->treeNodes[1]); + + $s = $left; + + if ($this->isWordChar(substr($left, -1))) + $s .= ' '; + + $s .= $n->type; + + if ($this->isWordChar($right[0]) || $right[0] == '\\') + $s .= ' '; + + $s .= $right; + break; + + case KEYWORD_DELETE: + case KEYWORD_TYPEOF: + $right = $this->parseTree($n->treeNodes[0]); + + $s = $n->type; + + if ($this->isWordChar($right[0]) || $right[0] == '\\') + $s .= ' '; + + $s .= $right; + break; + + case KEYWORD_VOID: + $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')'; + break; + + case KEYWORD_DEBUGGER: + throw new Exception('NOT IMPLEMENTED: DEBUGGER'); + break; + + case TOKEN_CONDCOMMENT_START: + case TOKEN_CONDCOMMENT_END: + $s = $n->value . ($n->type == TOKEN_CONDCOMMENT_START ? ' ' : ''); + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + $s .= $this->parseTree($childs[$i]); + break; + + case OP_SEMICOLON: + if ($expression = $n->expression) + $s = $this->parseTree($expression); + break; + + case JS_LABEL: + $s = $n->label . ':' . $this->parseTree($n->statement); + break; + + case OP_COMMA: + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]); + break; + + case OP_ASSIGN: + $s = $this->parseTree($n->treeNodes[0]) . $n->value . $this->parseTree($n->treeNodes[1]); + break; + + case OP_HOOK: + $s = $this->parseTree($n->treeNodes[0]) . '?' . $this->parseTree($n->treeNodes[1]) . ':' . $this->parseTree($n->treeNodes[2]); + break; + + case OP_OR: case OP_AND: + case OP_BITWISE_OR: case OP_BITWISE_XOR: case OP_BITWISE_AND: + case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE: + case OP_LT: case OP_LE: case OP_GE: case OP_GT: + case OP_LSH: case OP_RSH: case OP_URSH: + case OP_MUL: case OP_DIV: case OP_MOD: + $s = $this->parseTree($n->treeNodes[0]) . $n->type . $this->parseTree($n->treeNodes[1]); + break; + + case OP_PLUS: + case OP_MINUS: + $left = $this->parseTree($n->treeNodes[0]); + $right = $this->parseTree($n->treeNodes[1]); + + switch ($n->treeNodes[1]->type) + { + case OP_PLUS: + case OP_MINUS: + case OP_INCREMENT: + case OP_DECREMENT: + case OP_UNARY_PLUS: + case OP_UNARY_MINUS: + $s = $left . $n->type . ' ' . $right; + break; + + case TOKEN_STRING: + //combine concatted strings with same quotestyle + if ($n->type == OP_PLUS && substr($left, -1) == $right[0]) + { + $s = substr($left, 0, -1) . substr($right, 1); + break; + } + // FALL THROUGH + + default: + $s = $left . $n->type . $right; + } + break; + + case OP_NOT: + case OP_BITWISE_NOT: + case OP_UNARY_PLUS: + case OP_UNARY_MINUS: + $s = $n->value . $this->parseTree($n->treeNodes[0]); + break; + + case OP_INCREMENT: + case OP_DECREMENT: + if ($n->postfix) + $s = $this->parseTree($n->treeNodes[0]) . $n->value; + else + $s = $n->value . $this->parseTree($n->treeNodes[0]); + break; + + case OP_DOT: + $s = $this->parseTree($n->treeNodes[0]) . '.' . $this->parseTree($n->treeNodes[1]); + break; + + case JS_INDEX: + $s = $this->parseTree($n->treeNodes[0]); + // See if we can replace named index with a dot saving 3 bytes + if ( $n->treeNodes[0]->type == TOKEN_IDENTIFIER && + $n->treeNodes[1]->type == TOKEN_STRING && + $this->isValidIdentifier(substr($n->treeNodes[1]->value, 1, -1)) + ) + $s .= '.' . substr($n->treeNodes[1]->value, 1, -1); + else + $s .= '[' . $this->parseTree($n->treeNodes[1]) . ']'; + break; + + case JS_LIST: + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]); + break; + + case JS_CALL: + $s = $this->parseTree($n->treeNodes[0]) . '(' . $this->parseTree($n->treeNodes[1]) . ')'; + break; + + case KEYWORD_NEW: + case JS_NEW_WITH_ARGS: + $s = 'new ' . $this->parseTree($n->treeNodes[0]) . '(' . ($n->type == JS_NEW_WITH_ARGS ? $this->parseTree($n->treeNodes[1]) : '') . ')'; + break; + + case JS_ARRAY_INIT: + $s = '['; + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + { + $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]); + } + $s .= ']'; + break; + + case JS_OBJECT_INIT: + $s = '{'; + $childs = $n->treeNodes; + for ($i = 0, $j = count($childs); $i < $j; $i++) + { + $t = $childs[$i]; + if ($i) + $s .= ','; + if ($t->type == JS_PROPERTY_INIT) + { + // Ditch the quotes when the index is a valid identifier + if ( $t->treeNodes[0]->type == TOKEN_STRING && + $this->isValidIdentifier(substr($t->treeNodes[0]->value, 1, -1)) + ) + $s .= substr($t->treeNodes[0]->value, 1, -1); + else + $s .= $t->treeNodes[0]->value; + + $s .= ':' . $this->parseTree($t->treeNodes[1]); + } + else + { + $s .= $t->type == JS_GETTER ? 'get' : 'set'; + $s .= ' ' . $t->name . '('; + $params = $t->params; + for ($i = 0, $j = count($params); $i < $j; $i++) + $s .= ($i ? ',' : '') . $params[$i]; + $s .= '){' . $this->parseTree($t->body, true) . '}'; + } + } + $s .= '}'; + break; + + case TOKEN_NUMBER: + $s = $n->value; + if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m)) + $s = $m[1] . 'e' . strlen($m[2]); + break; + + case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE: + case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP: + $s = $n->value; + break; + + case JS_GROUP: + if (in_array( + $n->treeNodes[0]->type, + array( + JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP, + TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER, + KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE + ) + )) + { + $s = $this->parseTree($n->treeNodes[0]); + } + else + { + $s = '(' . $this->parseTree($n->treeNodes[0]) . ')'; + } + break; + + default: + throw new Exception('UNKNOWN TOKEN TYPE: ' . $n->type); + } + + return $s; + } + + private function isValidIdentifier($string) + { + return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved); + } + + private function isWordChar($char) + { + return $char == '_' || $char == '$' || ctype_alnum($char); + } +} + +class JSParser +{ + private $t; + private $minifier; + + private $opPrecedence = array( + ';' => 0, + ',' => 1, + '=' => 2, '?' => 2, ':' => 2, + // The above all have to have the same precedence, see bug 330975 + '||' => 4, + '&&' => 5, + '|' => 6, + '^' => 7, + '&' => 8, + '==' => 9, '!=' => 9, '===' => 9, '!==' => 9, + '<' => 10, '<=' => 10, '>=' => 10, '>' => 10, 'in' => 10, 'instanceof' => 10, + '<<' => 11, '>>' => 11, '>>>' => 11, + '+' => 12, '-' => 12, + '*' => 13, '/' => 13, '%' => 13, + 'delete' => 14, 'void' => 14, 'typeof' => 14, + '!' => 14, '~' => 14, 'U+' => 14, 'U-' => 14, + '++' => 15, '--' => 15, + 'new' => 16, + '.' => 17, + JS_NEW_WITH_ARGS => 0, JS_INDEX => 0, JS_CALL => 0, + JS_ARRAY_INIT => 0, JS_OBJECT_INIT => 0, JS_GROUP => 0 + ); + + private $opArity = array( + ',' => -2, + '=' => 2, + '?' => 3, + '||' => 2, + '&&' => 2, + '|' => 2, + '^' => 2, + '&' => 2, + '==' => 2, '!=' => 2, '===' => 2, '!==' => 2, + '<' => 2, '<=' => 2, '>=' => 2, '>' => 2, 'in' => 2, 'instanceof' => 2, + '<<' => 2, '>>' => 2, '>>>' => 2, + '+' => 2, '-' => 2, + '*' => 2, '/' => 2, '%' => 2, + 'delete' => 1, 'void' => 1, 'typeof' => 1, + '!' => 1, '~' => 1, 'U+' => 1, 'U-' => 1, + '++' => 1, '--' => 1, + 'new' => 1, + '.' => 2, + JS_NEW_WITH_ARGS => 2, JS_INDEX => 2, JS_CALL => 2, + JS_ARRAY_INIT => 1, JS_OBJECT_INIT => 1, JS_GROUP => 1, + TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1 + ); + + public function __construct($minifier=null) + { + $this->minifier = $minifier; + $this->t = new JSTokenizer(); + } + + public function parse($s, $f, $l) + { + // initialize tokenizer + $this->t->init($s, $f, $l); + + $x = new JSCompilerContext(false); + $n = $this->Script($x); + if (!$this->t->isDone()) + throw $this->t->newSyntaxError('Syntax error'); + + return $n; + } + + private function Script($x) + { + $n = $this->Statements($x); + $n->type = JS_SCRIPT; + $n->funDecls = $x->funDecls; + $n->varDecls = $x->varDecls; + + // minify by scope + if ($this->minifier) + { + $n->value = $this->minifier->parseTree($n); + + // clear tree from node to save memory + $n->treeNodes = null; + $n->funDecls = null; + $n->varDecls = null; + + $n->type = JS_MINIFIED; + } + + return $n; + } + + private function Statements($x) + { + $n = new JSNode($this->t, JS_BLOCK); + array_push($x->stmtStack, $n); + + while (!$this->t->isDone() && $this->t->peek() != OP_RIGHT_CURLY) + $n->addNode($this->Statement($x)); + + array_pop($x->stmtStack); + + return $n; + } + + private function Block($x) + { + $this->t->mustMatch(OP_LEFT_CURLY); + $n = $this->Statements($x); + $this->t->mustMatch(OP_RIGHT_CURLY); + + return $n; + } + + private function Statement($x) + { + $tt = $this->t->get(); + $n2 = null; + + // Cases for statements ending in a right curly return early, avoiding the + // common semicolon insertion magic after this switch. + switch ($tt) + { + case KEYWORD_FUNCTION: + return $this->FunctionDefinition( + $x, + true, + count($x->stmtStack) > 1 ? STATEMENT_FORM : DECLARED_FORM + ); + break; + + case OP_LEFT_CURLY: + $n = $this->Statements($x); + $this->t->mustMatch(OP_RIGHT_CURLY); + return $n; + + case KEYWORD_IF: + $n = new JSNode($this->t); + $n->condition = $this->ParenExpression($x); + array_push($x->stmtStack, $n); + $n->thenPart = $this->Statement($x); + $n->elsePart = $this->t->match(KEYWORD_ELSE) ? $this->Statement($x) : null; + array_pop($x->stmtStack); + return $n; + + case KEYWORD_SWITCH: + $n = new JSNode($this->t); + $this->t->mustMatch(OP_LEFT_PAREN); + $n->discriminant = $this->Expression($x); + $this->t->mustMatch(OP_RIGHT_PAREN); + $n->cases = array(); + $n->defaultIndex = -1; + + array_push($x->stmtStack, $n); + + $this->t->mustMatch(OP_LEFT_CURLY); + + while (($tt = $this->t->get()) != OP_RIGHT_CURLY) + { + switch ($tt) + { + case KEYWORD_DEFAULT: + if ($n->defaultIndex >= 0) + throw $this->t->newSyntaxError('More than one switch default'); + // FALL THROUGH + case KEYWORD_CASE: + $n2 = new JSNode($this->t); + if ($tt == KEYWORD_DEFAULT) + $n->defaultIndex = count($n->cases); + else + $n2->caseLabel = $this->Expression($x, OP_COLON); + break; + default: + throw $this->t->newSyntaxError('Invalid switch case'); + } + + $this->t->mustMatch(OP_COLON); + $n2->statements = new JSNode($this->t, JS_BLOCK); + while (($tt = $this->t->peek()) != KEYWORD_CASE && $tt != KEYWORD_DEFAULT && $tt != OP_RIGHT_CURLY) + $n2->statements->addNode($this->Statement($x)); + + array_push($n->cases, $n2); + } + + array_pop($x->stmtStack); + return $n; + + case KEYWORD_FOR: + $n = new JSNode($this->t); + $n->isLoop = true; + $this->t->mustMatch(OP_LEFT_PAREN); + + if (($tt = $this->t->peek()) != OP_SEMICOLON) + { + $x->inForLoopInit = true; + if ($tt == KEYWORD_VAR || $tt == KEYWORD_CONST) + { + $this->t->get(); + $n2 = $this->Variables($x); + } + else + { + $n2 = $this->Expression($x); + } + $x->inForLoopInit = false; + } + + if ($n2 && $this->t->match(KEYWORD_IN)) + { + $n->type = JS_FOR_IN; + if ($n2->type == KEYWORD_VAR) + { + if (count($n2->treeNodes) != 1) + { + throw $this->t->newSyntaxError( + 'Invalid for..in left-hand side', + $this->t->filename, + $n2->lineno + ); + } + + // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name. + $n->iterator = $n2->treeNodes[0]; + $n->varDecl = $n2; + } + else + { + $n->iterator = $n2; + $n->varDecl = null; + } + + $n->object = $this->Expression($x); + } + else + { + $n->setup = $n2 ? $n2 : null; + $this->t->mustMatch(OP_SEMICOLON); + $n->condition = $this->t->peek() == OP_SEMICOLON ? null : $this->Expression($x); + $this->t->mustMatch(OP_SEMICOLON); + $n->update = $this->t->peek() == OP_RIGHT_PAREN ? null : $this->Expression($x); + } + + $this->t->mustMatch(OP_RIGHT_PAREN); + $n->body = $this->nest($x, $n); + return $n; + + case KEYWORD_WHILE: + $n = new JSNode($this->t); + $n->isLoop = true; + $n->condition = $this->ParenExpression($x); + $n->body = $this->nest($x, $n); + return $n; + + case KEYWORD_DO: + $n = new JSNode($this->t); + $n->isLoop = true; + $n->body = $this->nest($x, $n, KEYWORD_WHILE); + $n->condition = $this->ParenExpression($x); + if (!$x->ecmaStrictMode) + { + //