#
# Jasy - Web Tooling Framework
# Copyright 2010-2012 Zynga Inc.
# Copyright 2013-2014 Sebastian Werner
#
#
# License: MPL 1.1/GPL 2.0/LGPL 2.1
# Authors:
# - Brendan Eich <brendan@mozilla.org> (Original JavaScript) (2004-2010)
# - Sebastian Werner <info@sebastian-werner.net> (Python Port) (2010-2012)
#
import jasy.script.tokenize.Tokenizer
import jasy.script.parse.VanillaBuilder
import jasy.script.tokenize.Lang
__all__ = ("parse", "parseExpression")
[docs]def parseExpression(source, fileId=None, line=1, builder=None):
if builder is None:
builder = jasy.script.parse.VanillaBuilder.VanillaBuilder()
# Convert source into expression statement to be friendly to the Tokenizer
if not source.endswith(";"):
source = source + ";"
tokenizer = jasy.script.tokenize.Tokenizer.Tokenizer(source, fileId, line)
staticContext = StaticContext(False, builder)
return Expression(tokenizer, staticContext)
[docs]def parse(source, fileId=None, line=1, builder=None):
if builder is None:
builder = jasy.script.parse.VanillaBuilder.VanillaBuilder()
tokenizer = jasy.script.tokenize.Tokenizer.Tokenizer(source, fileId, line)
staticContext = StaticContext(False, builder)
node = Script(tokenizer, staticContext)
# store fileId on top-level node
node.fileId = tokenizer.fileId
# add missing comments e.g. empty file with only a comment etc.
# if there is something non-attached by an inner node it is attached to
# the top level node, which is not correct, but might be better than
# just ignoring the comment after all.
if len(node) > 0:
builder.COMMENTS_add(node[-1], None, tokenizer.getComments())
else:
builder.COMMENTS_add(node, None, tokenizer.getComments())
if not tokenizer.done():
raise SyntaxError("Unexpected end of file", tokenizer)
return node
class SyntaxError(Exception):
def __init__(self, message, tokenizer):
Exception.__init__(self, "Syntax error: %s\n%s:%s" % (message, tokenizer.fileId, tokenizer.line))
# Used as a status container during tree-building for every def body and the global body
class StaticContext(object):
# inFunction is used to check if a return stm appears in a valid context.
def __init__(self, inFunction, builder):
# Whether this is inside a function, mostly True, only for top-level scope it's False
self.inFunction = inFunction
self.hasEmptyReturn = False
self.hasReturnWithValue = False
self.isGenerator = False
self.blockId = 0
self.builder = builder
self.statementStack = []
# Sets to store variable uses
# self.functions = set()
# self.variables = set()
# Status
# self.needsHoisting = False
self.bracketLevel = 0
self.curlyLevel = 0
self.parenLevel = 0
self.hookLevel = 0
# Configure strict ecmascript 3 mode
self.ecma3OnlyMode = False
# Status flag during parsing
self.inForLoopInit = False
def Script(tokenizer, staticContext):
"""Parses the toplevel and def bodies."""
node = Statements(tokenizer, staticContext)
# change type from "block" to "script" for script root
node.type = "script"
# copy over data from compiler context
# node.functions = staticContext.functions
# node.variables = staticContext.variables
return node
def nest(tokenizer, staticContext, node, func, end=None):
"""Statement stack and nested statement handler."""
staticContext.statementStack.append(node)
node = func(tokenizer, staticContext)
staticContext.statementStack.pop()
end and tokenizer.mustMatch(end)
return node
def Statements(tokenizer, staticContext):
"""Parses a list of Statements."""
builder = staticContext.builder
node = builder.BLOCK_build(tokenizer, staticContext.blockId)
staticContext.blockId += 1
builder.BLOCK_hoistLets(node)
staticContext.statementStack.append(node)
prevNode = None
while not tokenizer.done() and tokenizer.peek(True) != "right_curly":
comments = tokenizer.getComments()
childNode = Statement(tokenizer, staticContext)
builder.COMMENTS_add(childNode, prevNode, comments)
builder.BLOCK_addStatement(node, childNode)
prevNode = childNode
staticContext.statementStack.pop()
builder.BLOCK_finish(node)
# if getattr(node, "needsHoisting", False):
# TODO
# raise Exception("Needs hoisting went true!!!")
# builder.setHoists(node.id, node.variables)
# Propagate up to the function.
# staticContext.needsHoisting = True
return node
def Block(tokenizer, staticContext):
tokenizer.mustMatch("left_curly")
node = Statements(tokenizer, staticContext)
tokenizer.mustMatch("right_curly")
return node
def Statement(tokenizer, staticContext):
"""Parses a Statement."""
tokenType = tokenizer.get(True)
builder = staticContext.builder
# Cases for statements ending in a right curly return early, avoiding the
# common semicolon insertion magic after this switch.
if tokenType == "function":
# "declared_form" extends functions of staticContext, "statement_form" doesn'tokenizer.
if len(staticContext.statementStack) > 1:
kind = "statement_form"
else:
kind = "declared_form"
return FunctionDefinition(tokenizer, staticContext, True, kind)
elif tokenType == "left_curly":
node = Statements(tokenizer, staticContext)
tokenizer.mustMatch("right_curly")
return node
elif tokenType == "if":
node = builder.IF_build(tokenizer)
builder.IF_setCondition(node, ParenExpression(tokenizer, staticContext))
staticContext.statementStack.append(node)
builder.IF_setThenPart(node, Statement(tokenizer, staticContext))
if tokenizer.match("else"):
comments = tokenizer.getComments()
elsePart = Statement(tokenizer, staticContext)
builder.COMMENTS_add(elsePart, node, comments)
builder.IF_setElsePart(node, elsePart)
staticContext.statementStack.pop()
builder.IF_finish(node)
return node
elif tokenType == "switch":
# This allows CASEs after a "default", which is in the standard.
node = builder.SWITCH_build(tokenizer)
builder.SWITCH_setDiscriminant(node, ParenExpression(tokenizer, staticContext))
staticContext.statementStack.append(node)
tokenizer.mustMatch("left_curly")
tokenType = tokenizer.get()
while tokenType != "right_curly":
if tokenType == "default":
if node.defaultIndex >= 0:
raise SyntaxError("More than one switch default", tokenizer)
childNode = builder.DEFAULT_build(tokenizer)
builder.SWITCH_setDefaultIndex(node, len(node) - 1)
tokenizer.mustMatch("colon")
builder.DEFAULT_initializeStatements(childNode, tokenizer)
while True:
tokenType = tokenizer.peek(True)
if tokenType == "case" or tokenType == "default" or tokenType == "right_curly":
break
builder.DEFAULT_addStatement(childNode, Statement(tokenizer, staticContext))
builder.DEFAULT_finish(childNode)
elif tokenType == "case":
childNode = builder.CASE_build(tokenizer)
builder.CASE_setLabel(childNode, Expression(tokenizer, staticContext))
tokenizer.mustMatch("colon")
builder.CASE_initializeStatements(childNode, tokenizer)
while True:
tokenType = tokenizer.peek(True)
if tokenType == "case" or tokenType == "default" or tokenType == "right_curly":
break
builder.CASE_addStatement(childNode, Statement(tokenizer, staticContext))
builder.CASE_finish(childNode)
else:
raise SyntaxError("Invalid switch case", tokenizer)
builder.SWITCH_addCase(node, childNode)
tokenType = tokenizer.get()
staticContext.statementStack.pop()
builder.SWITCH_finish(node)
return node
elif tokenType == "for":
node = builder.FOR_build(tokenizer)
forBlock = None
if tokenizer.match("identifier") and tokenizer.token.value == "each":
builder.FOR_rebuildForEach(node)
tokenizer.mustMatch("left_paren")
tokenType = tokenizer.peek()
childNode = None
if tokenType != "semicolon":
staticContext.inForLoopInit = True
if tokenType == "var" or tokenType == "const":
tokenizer.get()
childNode = Variables(tokenizer, staticContext)
elif tokenType == "let":
tokenizer.get()
if tokenizer.peek() == "left_paren":
childNode = LetBlock(tokenizer, staticContext, False)
else:
# Let in for head, we need to add an implicit block
# around the rest of the for.
forBlock = builder.BLOCK_build(tokenizer, staticContext.blockId)
staticContext.blockId += 1
staticContext.statementStack.append(forBlock)
childNode = Variables(tokenizer, staticContext, forBlock)
else:
childNode = Expression(tokenizer, staticContext)
staticContext.inForLoopInit = False
if childNode and tokenizer.match("in"):
builder.FOR_rebuildForIn(node)
builder.FOR_setObject(node, Expression(tokenizer, staticContext), forBlock)
if childNode.type == "var" or childNode.type == "let":
if len(childNode) != 1:
raise SyntaxError("Invalid for..in left-hand side", tokenizer)
builder.FOR_setIterator(node, childNode, forBlock)
else:
builder.FOR_setIterator(node, childNode, forBlock)
else:
builder.FOR_setSetup(node, childNode)
tokenizer.mustMatch("semicolon")
if node.isEach:
raise SyntaxError("Invalid for each..in loop", tokenizer)
if tokenizer.peek() == "semicolon":
builder.FOR_setCondition(node, None)
else:
builder.FOR_setCondition(node, Expression(tokenizer, staticContext))
tokenizer.mustMatch("semicolon")
if tokenizer.peek() == "right_paren":
builder.FOR_setUpdate(node, None)
else:
builder.FOR_setUpdate(node, Expression(tokenizer, staticContext))
tokenizer.mustMatch("right_paren")
builder.FOR_setBody(node, nest(tokenizer, staticContext, node, Statement))
if forBlock:
builder.BLOCK_finish(forBlock)
staticContext.statementStack.pop()
builder.FOR_finish(node)
return node
elif tokenType == "while":
node = builder.WHILE_build(tokenizer)
builder.WHILE_setCondition(node, ParenExpression(tokenizer, staticContext))
builder.WHILE_setBody(node, nest(tokenizer, staticContext, node, Statement))
builder.WHILE_finish(node)
return node
elif tokenType == "do":
node = builder.DO_build(tokenizer)
builder.DO_setBody(node, nest(tokenizer, staticContext, node, Statement, "while"))
builder.DO_setCondition(node, ParenExpression(tokenizer, staticContext))
builder.DO_finish(node)
if not staticContext.ecma3OnlyMode:
# <script language="JavaScript"> (without version hints) may need
# automatic semicolon insertion without a newline after do-while.
# See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
tokenizer.match("semicolon")
return node
# NO RETURN
elif tokenType == "break" or tokenType == "continue":
if tokenType == "break":
node = builder.BREAK_build(tokenizer)
else:
node = builder.CONTINUE_build(tokenizer)
if tokenizer.peekOnSameLine() == "identifier":
tokenizer.get()
if tokenType == "break":
builder.BREAK_setLabel(node, tokenizer.token.value)
else:
builder.CONTINUE_setLabel(node, tokenizer.token.value)
statementStack = staticContext.statementStack
i = len(statementStack)
label = node.label if hasattr(node, "label") else None
if label:
while True:
i -= 1
if i < 0:
raise SyntaxError("Label not found", tokenizer)
if getattr(statementStack[i], "label", None) == label:
break
#
# Both break and continue to label need to be handled specially
# within a labeled loop, so that they target that loop. If not in
# a loop, then break targets its labeled statement. Labels can be
# nested so we skip all labels immediately enclosing the nearest
# non-label statement.
#
while i < len(statementStack) - 1 and statementStack[i + 1].type == "label":
i += 1
if i < len(statementStack) - 1 and getattr(statementStack[i + 1], "isLoop", False):
i += 1
elif tokenType == "continue":
raise SyntaxError("Invalid continue", tokenizer)
else:
while True:
i -= 1
if i < 0:
if tokenType == "break":
raise SyntaxError("Invalid break", tokenizer)
else:
raise SyntaxError("Invalid continue", tokenizer)
if getattr(statementStack[i], "isLoop", False) or (tokenType == "break" and statementStack[i].type == "switch"):
break
if tokenType == "break":
builder.BREAK_finish(node)
else:
builder.CONTINUE_finish(node)
# NO RETURN
elif tokenType == "try":
node = builder.TRY_build(tokenizer)
builder.TRY_setTryBlock(node, Block(tokenizer, staticContext))
while tokenizer.match("catch"):
childNode = builder.CATCH_build(tokenizer)
tokenizer.mustMatch("left_paren")
nextTokenType = tokenizer.get()
if nextTokenType == "left_bracket" or nextTokenType == "left_curly":
# Destructured catch identifiers.
tokenizer.unget()
exception = DestructuringExpression(tokenizer, staticContext, True)
elif nextTokenType == "identifier":
exception = builder.CATCH_wrapException(tokenizer)
else:
raise SyntaxError("Missing identifier in catch", tokenizer)
builder.CATCH_setException(childNode, exception)
if tokenizer.match("if"):
if staticContext.ecma3OnlyMode:
raise SyntaxError("Illegal catch guard", tokenizer)
if node.getChildrenLength() > 0 and not node.getUnrelatedChildren()[0].guard:
raise SyntaxError("Guarded catch after unguarded", tokenizer)
builder.CATCH_setGuard(childNode, Expression(tokenizer, staticContext))
else:
builder.CATCH_setGuard(childNode, None)
tokenizer.mustMatch("right_paren")
builder.CATCH_setBlock(childNode, Block(tokenizer, staticContext))
builder.CATCH_finish(childNode)
builder.TRY_addCatch(node, childNode)
builder.TRY_finishCatches(node)
if tokenizer.match("finally"):
builder.TRY_setFinallyBlock(node, Block(tokenizer, staticContext))
if node.getChildrenLength() == 0 and not hasattr(node, "finallyBlock"):
raise SyntaxError("Invalid try statement", tokenizer)
builder.TRY_finish(node)
return node
elif tokenType == "catch" or tokenType == "finally":
raise SyntaxError(tokens[tokenType] + " without preceding try", tokenizer)
elif tokenType == "throw":
node = builder.THROW_build(tokenizer)
builder.THROW_setException(node, Expression(tokenizer, staticContext))
builder.THROW_finish(node)
# NO RETURN
elif tokenType == "return":
node = returnOrYield(tokenizer, staticContext)
# NO RETURN
elif tokenType == "with":
node = builder.WITH_build(tokenizer)
builder.WITH_setObject(node, ParenExpression(tokenizer, staticContext))
builder.WITH_setBody(node, nest(tokenizer, staticContext, node, Statement))
builder.WITH_finish(node)
return node
elif tokenType == "var" or tokenType == "const":
node = Variables(tokenizer, staticContext)
# NO RETURN
elif tokenType == "let":
if tokenizer.peek() == "left_paren":
node = LetBlock(tokenizer, staticContext, True)
else:
node = Variables(tokenizer, staticContext)
# NO RETURN
elif tokenType == "debugger":
node = builder.DEBUGGER_build(tokenizer)
# NO RETURN
elif tokenType == "newline" or tokenType == "semicolon":
node = builder.SEMICOLON_build(tokenizer)
builder.SEMICOLON_setExpression(node, None)
builder.SEMICOLON_finish(tokenizer)
return node
else:
if tokenType == "identifier":
tokenType = tokenizer.peek()
# Labeled statement.
if tokenType == "colon":
label = tokenizer.token.value
statementStack = staticContext.statementStack
i = len(statementStack) - 1
while i >= 0:
if getattr(statementStack[i], "label", None) == label:
raise SyntaxError("Duplicate label", tokenizer)
i -= 1
tokenizer.get()
node = builder.LABEL_build(tokenizer)
builder.LABEL_setLabel(node, label)
builder.LABEL_setStatement(node, nest(tokenizer, staticContext, node, Statement))
builder.LABEL_finish(node)
return node
# Expression statement.
# We unget the current token to parse the expression as a whole.
node = builder.SEMICOLON_build(tokenizer)
tokenizer.unget()
builder.SEMICOLON_setExpression(node, Expression(tokenizer, staticContext))
node.end = node.expression.end
builder.SEMICOLON_finish(node)
# NO RETURN
MagicalSemicolon(tokenizer)
return node
def MagicalSemicolon(tokenizer):
if tokenizer.line == tokenizer.token.line:
tokenType = tokenizer.peekOnSameLine()
if tokenType != "end" and tokenType != "newline" and tokenType != "semicolon" and tokenType != "right_curly":
raise SyntaxError("Missing ; before statement", tokenizer)
tokenizer.match("semicolon")
def returnOrYield(tokenizer, staticContext):
builder = staticContext.builder
tokenType = tokenizer.token.type
if tokenType == "return":
if not staticContext.inFunction:
raise SyntaxError("Return not in function", tokenizer)
node = builder.RETURN_build(tokenizer)
else:
if not staticContext.inFunction:
raise SyntaxError("Yield not in function", tokenizer)
staticContext.isGenerator = True
node = builder.YIELD_build(tokenizer)
nextTokenType = tokenizer.peek(True)
if nextTokenType != "end" and nextTokenType != "newline" and nextTokenType != "semicolon" and nextTokenType != "right_curly" and (tokenType != "yield" or (nextTokenType != tokenType and nextTokenType != "right_bracket" and nextTokenType != "right_paren" and nextTokenType != "colon" and nextTokenType != "comma")):
if tokenType == "return":
builder.RETURN_setValue(node, Expression(tokenizer, staticContext))
staticContext.hasReturnWithValue = True
else:
builder.YIELD_setValue(node, AssignExpression(tokenizer, staticContext))
elif tokenType == "return":
staticContext.hasEmptyReturn = True
# Disallow return v; in generator.
if staticContext.hasReturnWithValue and staticContext.isGenerator:
raise SyntaxError("Generator returns a value", tokenizer)
if tokenType == "return":
builder.RETURN_finish(node)
else:
builder.YIELD_finish(node)
return node
def FunctionDefinition(tokenizer, staticContext, requireName, functionForm):
builder = staticContext.builder
functionNode = builder.FUNCTION_build(tokenizer)
if tokenizer.match("identifier"):
builder.FUNCTION_setName(functionNode, tokenizer.token.value)
elif requireName:
raise SyntaxError("Missing def identifier", tokenizer)
tokenizer.mustMatch("left_paren")
if not tokenizer.match("right_paren"):
builder.FUNCTION_initParams(functionNode, tokenizer)
prevParamNode = None
while True:
tokenType = tokenizer.get()
if tokenType == "left_bracket" or tokenType == "left_curly":
# Destructured formal parameters.
tokenizer.unget()
paramNode = DestructuringExpression(tokenizer, staticContext)
elif tokenType == "identifier":
paramNode = builder.FUNCTION_wrapParam(tokenizer)
else:
raise SyntaxError("Missing formal parameter", tokenizer)
builder.FUNCTION_addParam(functionNode, tokenizer, paramNode)
builder.COMMENTS_add(paramNode, prevParamNode, tokenizer.getComments())
if not tokenizer.match("comma"):
break
prevParamNode = paramNode
tokenizer.mustMatch("right_paren")
# Do we have an expression closure or a normal body?
tokenType = tokenizer.get()
if tokenType != "left_curly":
builder.FUNCTION_setExpressionClosure(functionNode, True)
tokenizer.unget()
childContext = StaticContext(True, builder)
if staticContext.inFunction:
# Inner functions don't reset block numbering, only functions at
# the top level of the program do.
childContext.blockId = staticContext.blockId
if tokenType != "left_curly":
builder.FUNCTION_setBody(functionNode, AssignExpression(tokenizer, staticContext))
if staticContext.isGenerator:
raise SyntaxError("Generator returns a value", tokenizer)
else:
builder.FUNCTION_hoistVars(childContext.blockId)
builder.FUNCTION_setBody(functionNode, Script(tokenizer, childContext))
if tokenType == "left_curly":
tokenizer.mustMatch("right_curly")
functionNode.end = tokenizer.token.end
functionNode.functionForm = functionForm
builder.COMMENTS_add(functionNode.body, functionNode.body, tokenizer.getComments())
builder.FUNCTION_finish(functionNode, staticContext)
return functionNode
def Variables(tokenizer, staticContext, letBlock=None):
"""Parses a comma-separated list of var declarations (and maybe initializations)."""
builder = staticContext.builder
if tokenizer.token.type == "var":
build = builder.VAR_build
addDecl = builder.VAR_addDecl
finish = builder.VAR_finish
childContext = staticContext
elif tokenizer.token.type == "const":
build = builder.CONST_build
addDecl = builder.CONST_addDecl
finish = builder.CONST_finish
childContext = staticContext
elif tokenizer.token.type == "let" or tokenizer.token.type == "left_paren":
build = builder.LET_build
addDecl = builder.LET_addDecl
finish = builder.LET_finish
if not letBlock:
statementStack = staticContext.statementStack
i = len(statementStack) - 1
# a BLOCK *must* be found.
while statementStack[i].type != "block":
i -= 1
# Lets at the def toplevel are just vars, at least in SpiderMonkey.
if i == 0:
build = builder.VAR_build
addDecl = builder.VAR_addDecl
finish = builder.VAR_finish
childContext = staticContext
else:
childContext = statementStack[i]
else:
childContext = letBlock
node = build(tokenizer)
while True:
tokenType = tokenizer.get()
# Done in Python port!
# FIXME Should have a special DECLARATION node instead of overloading
# IDENTIFIER to mean both identifier declarations and destructured
# declarations.
childNode = builder.DECL_build(tokenizer)
if tokenType == "left_bracket" or tokenType == "left_curly":
# Pass in childContext if we need to add each pattern matched into
# its variables, else pass in staticContext.
# Need to unget to parse the full destructured expression.
tokenizer.unget()
builder.DECL_setNames(childNode, DestructuringExpression(tokenizer, staticContext, True, childContext))
if staticContext.inForLoopInit and tokenizer.peek() == "in":
addDecl(node, childNode, childContext)
if tokenizer.match("comma"):
continue
else:
break
tokenizer.mustMatch("assign")
if tokenizer.token.assignOp:
raise SyntaxError("Invalid variable initialization", tokenizer)
# Parse the init as a normal assignment.
builder.DECL_setInitializer(childNode, AssignExpression(tokenizer, staticContext))
builder.DECL_finish(childNode)
addDecl(node, childNode, childContext)
# Copy over names for variable list
# for nameNode in childNode.names:
# childContext.variables.add(nameNode.value)
if tokenizer.match("comma"):
continue
else:
break
if tokenType != "identifier":
raise SyntaxError("Missing variable name", tokenizer)
builder.DECL_setName(childNode, tokenizer.token.value)
builder.DECL_setReadOnly(childNode, node.type == "const")
addDecl(node, childNode, childContext)
if tokenizer.match("assign"):
if tokenizer.token.assignOp:
raise SyntaxError("Invalid variable initialization", tokenizer)
initializerNode = AssignExpression(tokenizer, staticContext)
builder.DECL_setInitializer(childNode, initializerNode)
builder.DECL_finish(childNode)
# If we directly use the node in "let" constructs
# if not hasattr(childContext, "variables"):
# childContext.variables = set()
# childContext.variables.add(childNode.name)
if not tokenizer.match("comma"):
break
finish(node)
return node
def LetBlock(tokenizer, staticContext, isStatement):
"""Does not handle let inside of for loop init."""
builder = staticContext.builder
# tokenizer.token.type must be "let"
node = builder.LETBLOCK_build(tokenizer)
tokenizer.mustMatch("left_paren")
builder.LETBLOCK_setVariables(node, Variables(tokenizer, staticContext, node))
tokenizer.mustMatch("right_paren")
if isStatement and tokenizer.peek() != "left_curly":
# If this is really an expression in let statement guise, then we
# need to wrap the "let_block" node in a "semicolon" node so that we pop
# the return value of the expression.
childNode = builder.SEMICOLON_build(tokenizer)
builder.SEMICOLON_setExpression(childNode, node)
builder.SEMICOLON_finish(childNode)
isStatement = False
if isStatement:
childNode = Block(tokenizer, staticContext)
builder.LETBLOCK_setBlock(node, childNode)
else:
childNode = AssignExpression(tokenizer, staticContext)
builder.LETBLOCK_setExpression(node, childNode)
builder.LETBLOCK_finish(node)
return node
def checkDestructuring(tokenizer, staticContext, node, simpleNamesOnly=None, data=None):
if node.type == "array_comp":
raise SyntaxError("Invalid array comprehension left-hand side", tokenizer)
if node.type != "array_init" and node.type != "object_init":
return
builder = staticContext.builder
for child in node:
if child is None:
continue
if child.type == "property_init":
lhs = child[0]
rhs = child[1]
else:
lhs = None
rhs = None
if rhs and (rhs.type == "array_init" or rhs.type == "object_init"):
checkDestructuring(tokenizer, staticContext, rhs, simpleNamesOnly, data)
if lhs and simpleNamesOnly:
# In declarations, lhs must be simple names
if lhs.type != "identifier":
raise SyntaxError("Missing name in pattern", tokenizer)
elif data:
childNode = builder.DECL_build(tokenizer)
builder.DECL_setName(childNode, lhs.value)
# Don't need to set initializer because it's just for
# hoisting anyways.
builder.DECL_finish(childNode)
# Each pattern needs to be added to variables.
# data.variables.add(childNode.name)
# JavaScript 1.7
def DestructuringExpression(tokenizer, staticContext, simpleNamesOnly=None, data=None):
node = PrimaryExpression(tokenizer, staticContext)
checkDestructuring(tokenizer, staticContext, node, simpleNamesOnly, data)
return node
# JavsScript 1.7
def GeneratorExpression(tokenizer, staticContext, expression):
builder = staticContext.builder
node = builder.GENERATOR_build(tokenizer)
builder.GENERATOR_setExpression(node, expression)
builder.GENERATOR_setTail(node, comprehensionTail(tokenizer, staticContext))
builder.GENERATOR_finish(node)
return node
# JavaScript 1.7 Comprehensions Tails (Generators / Arrays)
def comprehensionTail(tokenizer, staticContext):
builder = staticContext.builder
# tokenizer.token.type must be "for"
body = builder.COMPTAIL_build(tokenizer)
while True:
node = builder.FOR_build(tokenizer)
# Comprehension tails are always for..in loops.
builder.FOR_rebuildForIn(node)
if tokenizer.match("identifier"):
# But sometimes they're for each..in.
if tokenizer.token.value == "each":
builder.FOR_rebuildForEach(node)
else:
tokenizer.unget()
tokenizer.mustMatch("left_paren")
tokenType = tokenizer.get()
if tokenType == "left_bracket" or tokenType == "left_curly":
tokenizer.unget()
# Destructured left side of for in comprehension tails.
builder.FOR_setIterator(node, DestructuringExpression(tokenizer, staticContext))
elif tokenType == "identifier":
# Removed variable/declaration substructure in Python port.
# Variable declarations are not allowed here. So why process them in such a way?
# declaration = builder.DECL_build(tokenizer)
# builder.DECL_setName(declaration, tokenizer.token.value)
# builder.DECL_finish(declaration)
# childNode = builder.VAR_build(tokenizer)
# builder.VAR_addDecl(childNode, declaration)
# builder.VAR_finish(childNode)
# builder.FOR_setIterator(node, declaration)
# Don't add to variables since the semantics of comprehensions is
# such that the variables are in their own def when desugared.
identifier = builder.PRIMARY_build(tokenizer, "identifier")
builder.FOR_setIterator(node, identifier)
else:
raise SyntaxError("Missing identifier", tokenizer)
tokenizer.mustMatch("in")
builder.FOR_setObject(node, Expression(tokenizer, staticContext))
tokenizer.mustMatch("right_paren")
builder.COMPTAIL_addFor(body, node)
if not tokenizer.match("for"):
break
# Optional guard.
if tokenizer.match("if"):
builder.COMPTAIL_setGuard(body, ParenExpression(tokenizer, staticContext))
builder.COMPTAIL_finish(body)
return body
def ParenExpression(tokenizer, staticContext):
tokenizer.mustMatch("left_paren")
# Always accept the 'in' operator in a parenthesized expression,
# where it's unambiguous, even if we might be parsing the init of a
# for statement.
oldLoopInit = staticContext.inForLoopInit
staticContext.inForLoopInit = False
node = Expression(tokenizer, staticContext)
staticContext.inForLoopInit = oldLoopInit
err = "expression must be parenthesized"
if tokenizer.match("for"):
if node.type == "yield" and not node.parenthesized:
raise SyntaxError("Yield " + err, tokenizer)
if node.type == "comma" and not node.parenthesized:
raise SyntaxError("Generator " + err, tokenizer)
node = GeneratorExpression(tokenizer, staticContext, node)
tokenizer.mustMatch("right_paren")
return node
def Expression(tokenizer, staticContext):
"""Top-down expression parser matched against SpiderMonkey."""
builder = staticContext.builder
node = AssignExpression(tokenizer, staticContext)
if tokenizer.match("comma"):
childNode = builder.COMMA_build(tokenizer)
builder.COMMA_addOperand(childNode, node)
node = childNode
while True:
childNode = node[len(node) - 1]
if childNode.type == "yield" and not childNode.parenthesized:
raise SyntaxError("Yield expression must be parenthesized", tokenizer)
builder.COMMA_addOperand(node, AssignExpression(tokenizer, staticContext))
if not tokenizer.match("comma"):
break
builder.COMMA_finish(node)
return node
def AssignExpression(tokenizer, staticContext):
builder = staticContext.builder
# Have to treat yield like an operand because it could be the leftmost
# operand of the expression.
if tokenizer.match("yield", True):
return returnOrYield(tokenizer, staticContext)
comments = tokenizer.getComments()
node = builder.ASSIGN_build(tokenizer)
lhs = ConditionalExpression(tokenizer, staticContext)
builder.COMMENTS_add(lhs, None, comments)
if not tokenizer.match("assign"):
builder.ASSIGN_finish(node)
return lhs
if lhs.type == "object_init" or lhs.type == "array_init":
checkDestructuring(tokenizer, staticContext, lhs)
elif lhs.type == "identifier" or lhs.type == "dot" or lhs.type == "index" or lhs.type == "call":
pass
else:
raise SyntaxError("Bad left-hand side of assignment", tokenizer)
builder.ASSIGN_setAssignOp(node, tokenizer.token.assignOp)
builder.ASSIGN_addOperand(node, lhs)
builder.ASSIGN_addOperand(node, AssignExpression(tokenizer, staticContext))
builder.ASSIGN_finish(node)
return node
def ConditionalExpression(tokenizer, staticContext):
builder = staticContext.builder
node = OrExpression(tokenizer, staticContext)
if tokenizer.match("hook"):
childNode = node
node = builder.HOOK_build(tokenizer)
builder.HOOK_setCondition(node, childNode)
# Always accept the 'in' operator in the middle clause of a ternary,
# where it's unambiguous, even if we might be parsing the init of a
# for statement.
oldLoopInit = staticContext.inForLoopInit
staticContext.inForLoopInit = False
builder.HOOK_setThenPart(node, AssignExpression(tokenizer, staticContext))
staticContext.inForLoopInit = oldLoopInit
if not tokenizer.match("colon"):
raise SyntaxError("Missing : after ?", tokenizer)
builder.HOOK_setElsePart(node, AssignExpression(tokenizer, staticContext))
builder.HOOK_finish(node)
return node
def OrExpression(tokenizer, staticContext):
builder = staticContext.builder
node = AndExpression(tokenizer, staticContext)
while tokenizer.match("or"):
childNode = builder.OR_build(tokenizer)
builder.OR_addOperand(childNode, node)
builder.OR_addOperand(childNode, AndExpression(tokenizer, staticContext))
builder.OR_finish(childNode)
node = childNode
return node
def AndExpression(tokenizer, staticContext):
builder = staticContext.builder
node = BitwiseOrExpression(tokenizer, staticContext)
while tokenizer.match("and"):
childNode = builder.AND_build(tokenizer)
builder.AND_addOperand(childNode, node)
builder.AND_addOperand(childNode, BitwiseOrExpression(tokenizer, staticContext))
builder.AND_finish(childNode)
node = childNode
return node
def BitwiseOrExpression(tokenizer, staticContext):
builder = staticContext.builder
node = BitwiseXorExpression(tokenizer, staticContext)
while tokenizer.match("bitwise_or"):
childNode = builder.BITWISEOR_build(tokenizer)
builder.BITWISEOR_addOperand(childNode, node)
builder.BITWISEOR_addOperand(childNode, BitwiseXorExpression(tokenizer, staticContext))
builder.BITWISEOR_finish(childNode)
node = childNode
return node
def BitwiseXorExpression(tokenizer, staticContext):
builder = staticContext.builder
node = BitwiseAndExpression(tokenizer, staticContext)
while tokenizer.match("bitwise_xor"):
childNode = builder.BITWISEXOR_build(tokenizer)
builder.BITWISEXOR_addOperand(childNode, node)
builder.BITWISEXOR_addOperand(childNode, BitwiseAndExpression(tokenizer, staticContext))
builder.BITWISEXOR_finish(childNode)
node = childNode
return node
def BitwiseAndExpression(tokenizer, staticContext):
builder = staticContext.builder
node = EqualityExpression(tokenizer, staticContext)
while tokenizer.match("bitwise_and"):
childNode = builder.BITWISEAND_build(tokenizer)
builder.BITWISEAND_addOperand(childNode, node)
builder.BITWISEAND_addOperand(childNode, EqualityExpression(tokenizer, staticContext))
builder.BITWISEAND_finish(childNode)
node = childNode
return node
def EqualityExpression(tokenizer, staticContext):
builder = staticContext.builder
node = RelationalExpression(tokenizer, staticContext)
while tokenizer.match("eq") or tokenizer.match("ne") or tokenizer.match("strict_eq") or tokenizer.match("strict_ne"):
childNode = builder.EQUALITY_build(tokenizer)
builder.EQUALITY_addOperand(childNode, node)
builder.EQUALITY_addOperand(childNode, RelationalExpression(tokenizer, staticContext))
builder.EQUALITY_finish(childNode)
node = childNode
return node
def RelationalExpression(tokenizer, staticContext):
builder = staticContext.builder
oldLoopInit = staticContext.inForLoopInit
# Uses of the in operator in shiftExprs are always unambiguous,
# so unset the flag that prohibits recognizing it.
staticContext.inForLoopInit = False
node = ShiftExpression(tokenizer, staticContext)
while tokenizer.match("lt") or tokenizer.match("le") or tokenizer.match("ge") or tokenizer.match("gt") or (oldLoopInit == False and tokenizer.match("in")) or tokenizer.match("instanceof"):
childNode = builder.RELATIONAL_build(tokenizer)
builder.RELATIONAL_addOperand(childNode, node)
builder.RELATIONAL_addOperand(childNode, ShiftExpression(tokenizer, staticContext))
builder.RELATIONAL_finish(childNode)
node = childNode
staticContext.inForLoopInit = oldLoopInit
return node
def ShiftExpression(tokenizer, staticContext):
builder = staticContext.builder
node = AddExpression(tokenizer, staticContext)
while tokenizer.match("lsh") or tokenizer.match("rsh") or tokenizer.match("ursh"):
childNode = builder.SHIFT_build(tokenizer)
builder.SHIFT_addOperand(childNode, node)
builder.SHIFT_addOperand(childNode, AddExpression(tokenizer, staticContext))
builder.SHIFT_finish(childNode)
node = childNode
return node
def AddExpression(tokenizer, staticContext):
builder = staticContext.builder
node = MultiplyExpression(tokenizer, staticContext)
while tokenizer.match("plus") or tokenizer.match("minus"):
childNode = builder.ADD_build(tokenizer)
builder.ADD_addOperand(childNode, node)
builder.ADD_addOperand(childNode, MultiplyExpression(tokenizer, staticContext))
builder.ADD_finish(childNode)
node = childNode
return node
def MultiplyExpression(tokenizer, staticContext):
builder = staticContext.builder
node = UnaryExpression(tokenizer, staticContext)
while tokenizer.match("mul") or tokenizer.match("div") or tokenizer.match("mod"):
childNode = builder.MULTIPLY_build(tokenizer)
builder.MULTIPLY_addOperand(childNode, node)
builder.MULTIPLY_addOperand(childNode, UnaryExpression(tokenizer, staticContext))
builder.MULTIPLY_finish(childNode)
node = childNode
return node
def UnaryExpression(tokenizer, staticContext):
builder = staticContext.builder
tokenType = tokenizer.get(True)
if tokenType in ["delete", "void", "typeof", "not", "bitwise_not", "plus", "minus"]:
node = builder.UNARY_build(tokenizer)
builder.UNARY_addOperand(node, UnaryExpression(tokenizer, staticContext))
elif tokenType == "increment" or tokenType == "decrement":
# Prefix increment/decrement.
node = builder.UNARY_build(tokenizer)
builder.UNARY_addOperand(node, MemberExpression(tokenizer, staticContext, True))
else:
tokenizer.unget()
node = MemberExpression(tokenizer, staticContext, True)
# Don't look across a newline boundary for a postfix {in,de}crement.
if tokenizer.tokens[(tokenizer.tokenIndex + tokenizer.lookahead - 1) & 3].line == tokenizer.line:
if tokenizer.match("increment") or tokenizer.match("decrement"):
childNode = builder.UNARY_build(tokenizer)
builder.UNARY_setPostfix(childNode)
builder.UNARY_finish(node)
builder.UNARY_addOperand(childNode, node)
node = childNode
builder.UNARY_finish(node)
return node
def MemberExpression(tokenizer, staticContext, allowCallSyntax):
builder = staticContext.builder
if tokenizer.match("new"):
node = builder.MEMBER_build(tokenizer)
builder.MEMBER_addOperand(node, MemberExpression(tokenizer, staticContext, False))
if tokenizer.match("left_paren"):
builder.MEMBER_rebuildNewWithArgs(node)
builder.MEMBER_addOperand(node, ArgumentList(tokenizer, staticContext))
builder.MEMBER_finish(node)
else:
node = PrimaryExpression(tokenizer, staticContext)
while True:
tokenType = tokenizer.get()
if tokenType == "end":
break
if tokenType == "dot":
childNode = builder.MEMBER_build(tokenizer)
builder.MEMBER_addOperand(childNode, node)
if tokenizer.peek() == "identifier":
tokenizer.mustMatch("identifier")
builder.MEMBER_addOperand(childNode, builder.MEMBER_build(tokenizer))
else:
# ES6 Promises
tokenizer.mustMatch("catch")
patched = builder.MEMBER_build(tokenizer)
patched.type = "identifier"
patched.value = "catch"
builder.MEMBER_addOperand(childNode, patched)
elif tokenType == "left_bracket":
childNode = builder.MEMBER_build(tokenizer, "index")
builder.MEMBER_addOperand(childNode, node)
builder.MEMBER_addOperand(childNode, Expression(tokenizer, staticContext))
tokenizer.mustMatch("right_bracket")
elif tokenType == "left_paren" and allowCallSyntax:
childNode = builder.MEMBER_build(tokenizer, "call")
builder.MEMBER_addOperand(childNode, node)
builder.MEMBER_addOperand(childNode, ArgumentList(tokenizer, staticContext))
else:
tokenizer.unget()
return node
builder.MEMBER_finish(childNode)
node = childNode
return node
def ArgumentList(tokenizer, staticContext):
builder = staticContext.builder
node = builder.LIST_build(tokenizer)
if tokenizer.match("right_paren", True):
return node
while True:
childNode = AssignExpression(tokenizer, staticContext)
if childNode.type == "yield" and not childNode.parenthesized and tokenizer.peek() == "comma":
raise SyntaxError("Yield expression must be parenthesized", tokenizer)
if tokenizer.match("for"):
childNode = GeneratorExpression(tokenizer, staticContext, childNode)
if len(node) > 1 or tokenizer.peek(True) == "comma":
raise SyntaxError("Generator expression must be parenthesized", tokenizer)
builder.LIST_addOperand(node, childNode)
if not tokenizer.match("comma"):
break
tokenizer.mustMatch("right_paren")
builder.LIST_finish(node)
return node
def PrimaryExpression(tokenizer, staticContext):
builder = staticContext.builder
tokenType = tokenizer.get(True)
if tokenType == "function":
node = FunctionDefinition(tokenizer, staticContext, False, "expressed_form")
elif tokenType == "left_bracket":
node = builder.ARRAYINIT_build(tokenizer)
while True:
tokenType = tokenizer.peek(True)
if tokenType == "right_bracket":
break
if tokenType == "comma":
tokenizer.get()
builder.ARRAYINIT_addElement(node, None)
continue
builder.ARRAYINIT_addElement(node, AssignExpression(tokenizer, staticContext))
if tokenType != "comma" and not tokenizer.match("comma"):
break
# If we matched exactly one element and got a "for", we have an
# array comprehension.
if len(node) == 1 and tokenizer.match("for"):
childNode = builder.ARRAYCOMP_build(tokenizer)
builder.ARRAYCOMP_setExpression(childNode, node[0])
builder.ARRAYCOMP_setTail(childNode, comprehensionTail(tokenizer, staticContext))
node = childNode
builder.COMMENTS_add(node, node, tokenizer.getComments())
tokenizer.mustMatch("right_bracket")
builder.PRIMARY_finish(node)
elif tokenType == "left_curly":
node = builder.OBJECTINIT_build(tokenizer)
if not tokenizer.match("right_curly"):
while True:
tokenType = tokenizer.get()
tokenValue = getattr(tokenizer.token, "value", None)
comments = tokenizer.getComments()
if tokenValue in ("get", "set") and tokenizer.peek() == "identifier":
if staticContext.ecma3OnlyMode:
raise SyntaxError("Illegal property accessor", tokenizer)
fd = FunctionDefinition(tokenizer, staticContext, True, "expressed_form")
builder.OBJECTINIT_addProperty(node, fd)
else:
if tokenType == "identifier" or tokenType == "number" or tokenType == "string":
id = builder.PRIMARY_build(tokenizer, "identifier")
builder.PRIMARY_finish(id)
elif tokenType == "right_curly":
if staticContext.ecma3OnlyMode:
raise SyntaxError("Illegal trailing ,", tokenizer)
tokenizer.unget()
break
else:
if tokenValue in jasy.script.tokenize.Lang.keywords:
id = builder.PRIMARY_build(tokenizer, "identifier")
builder.PRIMARY_finish(id)
else:
print("Value is '%s'" % tokenValue)
raise SyntaxError("Invalid property name", tokenizer)
if tokenizer.match("colon"):
childNode = builder.PROPERTYINIT_build(tokenizer)
builder.COMMENTS_add(childNode, node, comments)
builder.PROPERTYINIT_addOperand(childNode, id)
builder.PROPERTYINIT_addOperand(childNode, AssignExpression(tokenizer, staticContext))
builder.PROPERTYINIT_finish(childNode)
builder.OBJECTINIT_addProperty(node, childNode)
else:
# Support, e.g., |var {staticContext, y} = o| as destructuring shorthand
# for |var {staticContext: staticContext, y: y} = o|, per proposed JS2/ES4 for JS1.8.
if tokenizer.peek() != "comma" and tokenizer.peek() != "right_curly":
raise SyntaxError("Missing : after property", tokenizer)
builder.OBJECTINIT_addProperty(node, id)
if not tokenizer.match("comma"):
break
builder.COMMENTS_add(node, node, tokenizer.getComments())
tokenizer.mustMatch("right_curly")
builder.OBJECTINIT_finish(node)
elif tokenType == "left_paren":
# ParenExpression does its own matching on parentheses, so we need to unget.
tokenizer.unget()
node = ParenExpression(tokenizer, staticContext)
node.parenthesized = True
elif tokenType == "let":
node = LetBlock(tokenizer, staticContext, False)
elif tokenType in ["null", "this", "true", "false", "identifier", "number", "string", "regexp"]:
node = builder.PRIMARY_build(tokenizer, tokenType)
builder.PRIMARY_finish(node)
else:
raise SyntaxError("Missing operand. Found type: %s" % tokenType, tokenizer)
return node