Same name in other branches
- 5.0.x advagg_js_minify/jsminplus.inc \JSTokenizer::get()
- 7.x-1.x advagg_js_compress/jsminplus.inc \JSTokenizer::get()
- 7.x-2.x advagg_js_compress/jsminplus.inc \JSTokenizer::get()
- 8.x-2.x advagg_js_minify/jsminplus.inc \JSTokenizer::get()
- 8.x-3.x advagg_js_minify/jsminplus.inc \JSTokenizer::get()
- 8.x-4.x advagg_js_minify/jsminplus.inc \JSTokenizer::get()
2 calls to JSTokenizer::get()
- JSTokenizer::match dans advagg_js_minify/
jsminplus.inc - JSTokenizer::peek dans advagg_js_minify/
jsminplus.inc
Fichier
-
advagg_js_minify/
jsminplus.inc, line 2020
Classe
Code
public function get($chunksize = 1000, $op_dot = false) {
while ($this->lookahead) {
$this->lookahead--;
$this->tokenIndex = $this->tokenIndex + 1 & 3;
$token = $this->tokens[$this->tokenIndex];
if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) {
return $token->type;
}
}
$conditional_comment = false;
// strip whitespace and comments
while (true) {
$input = $this->getInput($chunksize);
// whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
$re = $this->scanNewlines ? '/^[ \\r\\t]+/' : '/^\\s+/';
if (preg_match($re, $input, $match)) {
$spaces = $match[0];
$spacelen = strlen($spaces);
$this->cursor += $spacelen;
if (!$this->scanNewlines) {
$this->lineno += substr_count($spaces, "\n");
}
if ($spacelen == $chunksize) {
continue;
// complete chunk contained whitespace
}
$input = $this->getInput($chunksize);
if ($input == '' || $input[0] != '/') {
break;
}
}
// Comments
if (!preg_match('/^\\/(?:\\*(@(?:cc_on|if|elif|else|end))?.*?\\*\\/|\\/[^\\n]*)/s', $input, $match)) {
if (!$chunksize) {
break;
}
// retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
$chunksize = null;
continue;
}
// check if this is a conditional (JScript) comment
if (!empty($match[1])) {
$match[0] = '/*' . $match[1];
$conditional_comment = true;
break;
}
else {
$this->cursor += strlen($match[0]);
$this->lineno += substr_count($match[0], "\n");
}
}
if ($input == '') {
$tt = TOKEN_END;
$match = array(
'',
);
}
elseif ($conditional_comment) {
$tt = TOKEN_CONDCOMMENT_START;
}
else {
switch ($input[0]) {
case '0':
// hexadecimal
if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) {
$tt = TOKEN_NUMBER;
break;
}
// FALL THROUGH
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// should always match
preg_match('/^\\d+(?:\\.\\d*)?(?:[eE][-+]?\\d+)?/', $input, $match);
$tt = TOKEN_NUMBER;
break;
case "'":
if (preg_match('/^\'(?:[^\\\\\'\\r\\n]++|\\\\(?:.|\\r?\\n))*\'/', $input, $match)) {
$tt = TOKEN_STRING;
}
else {
if ($chunksize) {
return $this->get(null);
// retry with a full chunk fetch
}
throw $this->newSyntaxError('Unterminated string literal');
}
break;
case '"':
if (preg_match('/^"(?:[^\\\\"\\r\\n]++|\\\\(?:.|\\r?\\n))*"/', $input, $match)) {
$tt = TOKEN_STRING;
}
else {
if ($chunksize) {
return $this->get(null);
// retry with a full chunk fetch
}
throw $this->newSyntaxError('Unterminated string literal');
}
break;
case '/':
if ($this->scanOperand && preg_match('/^\\/((?:\\\\.|\\[(?:\\\\.|[^\\]])*\\]|[^\\/])+)\\/([gimy]*)/', $input, $match)) {
$tt = TOKEN_REGEXP;
break;
}
// FALL THROUGH
case '|':
case '^':
case '&':
case '<':
case '>':
case '+':
case '-':
case '*':
case '%':
case '=':
case '!':
// should always match
preg_match($this->opRegExp, $input, $match);
$op = $match[0];
if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') {
$tt = OP_ASSIGN;
$match[0] .= '=';
}
else {
$tt = $op;
if ($this->scanOperand) {
if ($op == OP_PLUS) {
$tt = OP_UNARY_PLUS;
}
elseif ($op == OP_MINUS) {
$tt = OP_UNARY_MINUS;
}
}
$op = null;
}
break;
case '.':
if (preg_match('/^\\.\\d+(?:[eE][-+]?\\d+)?/', $input, $match)) {
$tt = TOKEN_NUMBER;
break;
}
// FALL THROUGH
case ';':
case ',':
case '?':
case ':':
case '~':
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
// these are all single
$match = array(
$input[0],
);
$tt = $input[0];
break;
case '@':
// check end of conditional comment
if (substr($input, 0, 3) == '@*/') {
$match = array(
'@*/',
);
$tt = TOKEN_CONDCOMMENT_END;
}
else {
throw $this->newSyntaxError('Illegal token');
}
break;
case "\n":
if ($this->scanNewlines) {
$match = array(
"\n",
);
$tt = TOKEN_NEWLINE;
}
else {
throw $this->newSyntaxError('Illegal token');
}
break;
default:
// Fast path for identifiers: word chars followed by whitespace or various other tokens.
// Note we don't need to exclude digits in the first char, as they've already been found
// above.
if (!preg_match('/^[$\\w]+(?=[\\s\\/\\|\\^\\&<>\\+\\-\\*%=!.;,\\?:~\\[\\]\\{\\}\\(\\)@])/', $input, $match)) {
// Character classes per ECMA-262 edition 5.1 section 7.6
// Per spec, must accept Unicode 3.0, *may* accept later versions.
// We'll take whatever PCRE understands, which should be more recent.
$identifierStartChars = "\\p{L}\\p{Nl}" . "\$" . "_";
$identifierPartChars = $identifierStartChars . "\\p{Mn}\\p{Mc}" . "\\p{Nd}" . "\\p{Pc}";
# UnicodeConnectorPunctuation
$unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
$identifierRegex = "/^" . "(?:[{$identifierStartChars}]|{$unicodeEscape})" . "(?:[{$identifierPartChars}]|{$unicodeEscape})*" . "/uS";
if (preg_match($identifierRegex, $input, $match)) {
if (strpos($match[0], '\\') !== false) {
// Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
// the original chars, but only within the boundaries of the identifier.
$decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', array(
__CLASS__,
'unicodeEscapeCallback',
), $match[0]);
// Since our original regex didn't de-escape the originals, we need to check for validity again.
// No need to worry about token boundaries, as anything outside the identifier is illegal!
if (!preg_match("/^[{$identifierStartChars}][{$identifierPartChars}]*\$/u", $decoded)) {
throw $this->newSyntaxError('Illegal token');
}
// Per spec it _ought_ to work to use these escapes for keywords words as well...
// but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
// that don't match the keyword.
if (in_array($decoded, $this->keywords)) {
throw $this->newSyntaxError('Illegal token');
}
// TODO: save the decoded form for output?
}
}
else {
throw $this->newSyntaxError('Illegal token');
}
}
// Identifiers after an OP_DOT can include otherwise reserve keywords.
if ($op_dot) {
$tt = TOKEN_IDENTIFIER;
}
else {
$tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
}
}
}
$this->tokenIndex = $this->tokenIndex + 1 & 3;
if (!isset($this->tokens[$this->tokenIndex])) {
$this->tokens[$this->tokenIndex] = new JSToken();
}
$token = $this->tokens[$this->tokenIndex];
$token->type = $tt;
if ($tt == OP_ASSIGN) {
$token->assignOp = $op;
}
$token->start = $this->cursor;
$token->value = $match[0];
$this->cursor += strlen($match[0]);
$token->end = $this->cursor;
$token->lineno = $this->lineno;
return $tt;
}