| Nick Dickinson-Wilde

Same name and namespace in other branches

5.0.x advagg_js_minify/jsminplus.inc \JSTokenizer 1 comment
6.0.x advagg_js_minify/jsminplus.inc \JSTokenizer 1 comment
7.x-2.x advagg_js_compress/jsminplus.inc \JSTokenizer 1 comment
8.x-2.x advagg_js_minify/jsminplus.inc \JSTokenizer 1 comment
8.x-3.x advagg_js_minify/jsminplus.inc \JSTokenizer 1 comment
8.x-4.x advagg_js_minify/jsminplus.inc \JSTokenizer 1 comment

Hierarchy

class \JSTokenizer

Expanded class hierarchy of JSTokenizer

File

advagg_js_compress/jsminplus.inc, line 1840

View source

class JSTokenizer {
    private $cursor = 0;
    private $source;
    public $tokens = array();
    public $tokenIndex = 0;
    public $lookahead = 0;
    public $scanNewlines = false;
    public $scanOperand = true;
    public $filename;
    public $lineno;
    private $keywords = array(
        'break',
        'case',
        'catch',
        'const',
        'continue',
        'debugger',
        'default',
        'delete',
        'do',
        'else',
        'enum',
        'false',
        'finally',
        'for',
        'function',
        'if',
        'in',
        'instanceof',
        'new',
        'null',
        'return',
        'switch',
        'this',
        'throw',
        'true',
        'try',
        'typeof',
        'var',
        'void',
        'while',
        'with',
    );
    private $opTypeNames = array(
        ';',
        ',',
        '?',
        ':',
        '||',
        '&&',
        '|',
        '^',
        '&',
        '===',
        '==',
        '=',
        '!==',
        '!=',
        '<<',
        '<=',
        '<',
        '>>>',
        '>>',
        '>=',
        '>',
        '++',
        '--',
        '+',
        '-',
        '*',
        '/',
        '%',
        '!',
        '~',
        '.',
        '[',
        ']',
        '{',
        '}',
        '(',
        ')',
        '@*/',
    );
    private $assignOps = array(
        '|',
        '^',
        '&',
        '<<',
        '>>',
        '>>>',
        '+',
        '-',
        '*',
        '/',
        '%',
    );
    private $opRegExp;
    public function __construct() {
        $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
    }
    public function init($source, $filename = '', $lineno = 1) {
        $this->source = $source;
        $this->filename = $filename ? $filename : '[inline]';
        $this->lineno = $lineno;
        $this->cursor = 0;
        $this->tokens = array();
        $this->tokenIndex = 0;
        $this->lookahead = 0;
        $this->scanNewlines = false;
        $this->scanOperand = true;
    }
    public function getInput($chunksize) {
        if ($chunksize) {
            return substr($this->source, $this->cursor, $chunksize);
        }
        return substr($this->source, $this->cursor);
    }
    public function isDone() {
        return $this->peek() == TOKEN_END;
    }
    public function match($tt) {
        return $this->get() == $tt || $this->unget();
    }
    public function mustMatch($tt) {
        if (!$this->match($tt)) {
            throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
        }
        return $this->currentToken();
    }
    public function peek() {
        if ($this->lookahead) {
            $next = $this->tokens[$this->tokenIndex + $this->lookahead & 3];
            if ($this->scanNewlines && $next->lineno != $this->lineno) {
                $tt = TOKEN_NEWLINE;
            }
            else {
                $tt = $next->type;
            }
        }
        else {
            $tt = $this->get();
            $this->unget();
        }
        return $tt;
    }
    public function peekOnSameLine() {
        $this->scanNewlines = true;
        $tt = $this->peek();
        $this->scanNewlines = false;
        return $tt;
    }
    public function currentToken() {
        if (!empty($this->tokens)) {
            return $this->tokens[$this->tokenIndex];
        }
    }
    public function get($chunksize = 1000) {
        while ($this->lookahead) {
            $this->lookahead--;
            $this->tokenIndex = $this->tokenIndex + 1 & 3;
            $token = $this->tokens[$this->tokenIndex];
            if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) {
                return $token->type;
            }
        }
        $conditional_comment = false;
        // strip whitespace and comments
        while (true) {
            $input = $this->getInput($chunksize);
            // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
            $re = $this->scanNewlines ? '/^[ \\r\\t]+/' : '/^\\s+/';
            if (preg_match($re, $input, $match)) {
                $spaces = $match[0];
                $spacelen = strlen($spaces);
                $this->cursor += $spacelen;
                if (!$this->scanNewlines) {
                    $this->lineno += substr_count($spaces, "\n");
                }
                if ($spacelen == $chunksize) {
                    continue;
                    // complete chunk contained whitespace
                }
                $input = $this->getInput($chunksize);
                if ($input == '' || $input[0] != '/') {
                    break;
                }
            }
            // Comments
            if (!preg_match('/^\\/(?:\\*(@(?:cc_on|if|elif|else|end))?.*?\\*\\/|\\/[^\\n]*)/s', $input, $match)) {
                if (!$chunksize) {
                    break;
                }
                // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
                $chunksize = null;
                continue;
            }
            // check if this is a conditional (JScript) comment
            if (!empty($match[1])) {
                $match[0] = '/*' . $match[1];
                $conditional_comment = true;
                break;
            }
            else {
                $this->cursor += strlen($match[0]);
                $this->lineno += substr_count($match[0], "\n");
            }
        }
        if ($input == '') {
            $tt = TOKEN_END;
            $match = array(
                '',
            );
        }
        elseif ($conditional_comment) {
            $tt = TOKEN_CONDCOMMENT_START;
        }
        else {
            switch ($input[0]) {
                case '0':
                    // hexadecimal
                    if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) {
                        $tt = TOKEN_NUMBER;
                        break;
                    }
                // FALL THROUGH
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    // should always match
                    preg_match('/^\\d+(?:\\.\\d*)?(?:[eE][-+]?\\d+)?/', $input, $match);
                    $tt = TOKEN_NUMBER;
                    break;
                case "'":
                    if (preg_match('/^\'(?:[^\\\\\'\\r\\n]++|\\\\(?:.|\\r?\\n))*\'/', $input, $match)) {
                        $tt = TOKEN_STRING;
                    }
                    else {
                        if ($chunksize) {
                            return $this->get(null);
                            // retry with a full chunk fetch
                        }
                        throw $this->newSyntaxError('Unterminated string literal');
                    }
                    break;
                case '"':
                    if (preg_match('/^"(?:[^\\\\"\\r\\n]++|\\\\(?:.|\\r?\\n))*"/', $input, $match)) {
                        $tt = TOKEN_STRING;
                    }
                    else {
                        if ($chunksize) {
                            return $this->get(null);
                            // retry with a full chunk fetch
                        }
                        throw $this->newSyntaxError('Unterminated string literal');
                    }
                    break;
                case '/':
                    if ($this->scanOperand && preg_match('/^\\/((?:\\\\.|\\[(?:\\\\.|[^\\]])*\\]|[^\\/])+)\\/([gimy]*)/', $input, $match)) {
                        $tt = TOKEN_REGEXP;
                        break;
                    }
                // FALL THROUGH
                case '|':
                case '^':
                case '&':
                case '<':
                case '>':
                case '+':
                case '-':
                case '*':
                case '%':
                case '=':
                case '!':
                    // should always match
                    preg_match($this->opRegExp, $input, $match);
                    $op = $match[0];
                    if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') {
                        $tt = OP_ASSIGN;
                        $match[0] .= '=';
                    }
                    else {
                        $tt = $op;
                        if ($this->scanOperand) {
                            if ($op == OP_PLUS) {
                                $tt = OP_UNARY_PLUS;
                            }
                            elseif ($op == OP_MINUS) {
                                $tt = OP_UNARY_MINUS;
                            }
                        }
                        $op = null;
                    }
                    break;
                case '.':
                    if (preg_match('/^\\.\\d+(?:[eE][-+]?\\d+)?/', $input, $match)) {
                        $tt = TOKEN_NUMBER;
                        break;
                    }
                // FALL THROUGH
                case ';':
                case ',':
                case '?':
                case ':':
                case '~':
                case '[':
                case ']':
                case '{':
                case '}':
                case '(':
                case ')':
                    // these are all single
                    $match = array(
                        $input[0],
                    );
                    $tt = $input[0];
                    break;
                case '@':
                    // check end of conditional comment
                    if (substr($input, 0, 3) == '@*/') {
                        $match = array(
                            '@*/',
                        );
                        $tt = TOKEN_CONDCOMMENT_END;
                    }
                    else {
                        throw $this->newSyntaxError('Illegal token');
                    }
                    break;
                case "\n":
                    if ($this->scanNewlines) {
                        $match = array(
                            "\n",
                        );
                        $tt = TOKEN_NEWLINE;
                    }
                    else {
                        throw $this->newSyntaxError('Illegal token');
                    }
                    break;
                default:
                    // FIXME: add support for unicode and unicode escape sequence \uHHHH
                    if (preg_match('/^[$\\w]+/', $input, $match)) {
                        $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
                    }
                    else {
                        throw $this->newSyntaxError('Illegal token');
                    }
            }
        }
        $this->tokenIndex = $this->tokenIndex + 1 & 3;
        if (!isset($this->tokens[$this->tokenIndex])) {
            $this->tokens[$this->tokenIndex] = new JSToken();
        }
        $token = $this->tokens[$this->tokenIndex];
        $token->type = $tt;
        if ($tt == OP_ASSIGN) {
            $token->assignOp = $op;
        }
        $token->start = $this->cursor;
        $token->value = $match[0];
        $this->cursor += strlen($match[0]);
        $token->end = $this->cursor;
        $token->lineno = $this->lineno;
        return $tt;
    }
    public function unget() {
        if (++$this->lookahead == 4) {
            throw $this->newSyntaxError('PANIC: too much lookahead!');
        }
        $this->tokenIndex = $this->tokenIndex - 1 & 3;
    }
    public function newSyntaxError($m) {
        return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
    }

}

Members

Title Sort descending	Modifiers	Object type
JSTokenizer::$assignOps	private	property
JSTokenizer::$cursor	private	property
JSTokenizer::$filename	public	property
JSTokenizer::$keywords	private	property
JSTokenizer::$lineno	public	property
JSTokenizer::$lookahead	public	property
JSTokenizer::$opRegExp	private	property
JSTokenizer::$opTypeNames	private	property
JSTokenizer::$scanNewlines	public	property
JSTokenizer::$scanOperand	public	property
JSTokenizer::$source	private	property
JSTokenizer::$tokenIndex	public	property
JSTokenizer::$tokens	public	property
JSTokenizer::currentToken	public	function
JSTokenizer::get	public	function
JSTokenizer::getInput	public	function
JSTokenizer::init	public	function
JSTokenizer::isDone	public	function
JSTokenizer::match	public	function
JSTokenizer::mustMatch	public	function
JSTokenizer::newSyntaxError	public	function
JSTokenizer::peek	public	function
JSTokenizer::peekOnSameLine	public	function
JSTokenizer::unget	public	function
JSTokenizer::__construct	public	function