cleaned up lexer
This commit is contained in:
parent
83c4808d5a
commit
9f60bbfd23
|
@ -21,26 +21,26 @@ const DESTINATION = 'destination'
|
||||||
|
|
||||||
var WHITESPACE = '\\s*'
|
var WHITESPACE = '\\s*'
|
||||||
|
|
||||||
var _states = {}
|
|
||||||
var _defaultState
|
|
||||||
var _currentState
|
|
||||||
|
|
||||||
var _indentStack = []
|
|
||||||
var _shouldTrackIndent : bool = false
|
|
||||||
|
|
||||||
var filename = ''
|
var filename = ''
|
||||||
var title = ''
|
var title = ''
|
||||||
var text = ''
|
var text = ''
|
||||||
|
|
||||||
|
var states = {}
|
||||||
|
var default_state
|
||||||
|
var current_state
|
||||||
|
|
||||||
|
var indent_stack = []
|
||||||
|
var should_track_indent = false
|
||||||
|
|
||||||
func _init(_filename, _title, _text):
|
func _init(_filename, _title, _text):
|
||||||
create_states()
|
createstates()
|
||||||
|
|
||||||
filename = _filename
|
filename = _filename
|
||||||
title = _title
|
title = _title
|
||||||
text = _text
|
text = _text
|
||||||
|
|
||||||
func create_states():
|
func createstates():
|
||||||
var patterns : Dictionary = {}
|
var patterns = {}
|
||||||
patterns[Constants.TokenType.Text] = ['.*', 'any text']
|
patterns[Constants.TokenType.Text] = ['.*', 'any text']
|
||||||
|
|
||||||
patterns[Constants.TokenType.Number] = ['\\-?[0-9]+(\\.[0-9+])?', 'any number']
|
patterns[Constants.TokenType.Number] = ['\\-?[0-9]+(\\.[0-9+])?', 'any number']
|
||||||
|
@ -86,249 +86,234 @@ func create_states():
|
||||||
patterns[Constants.TokenType.Set] = ['set(?!\\w)', '"set"']
|
patterns[Constants.TokenType.Set] = ['set(?!\\w)', '"set"']
|
||||||
patterns[Constants.TokenType.ShortcutOption] = ['\\-\\>\\s*', '"->"']
|
patterns[Constants.TokenType.ShortcutOption] = ['\\-\\>\\s*', '"->"']
|
||||||
|
|
||||||
#compound states
|
var shortcut_option = SHORTCUT + DASH + OPTION
|
||||||
var shortcut_option : String = SHORTCUT + DASH + OPTION
|
var shortcut_option_tag = shortcut_option + DASH + TAG
|
||||||
var shortcut_option_tag : String = shortcut_option + DASH + TAG
|
var command_or_expression = COMMAND + DASH + OR + DASH + EXPRESSION
|
||||||
var command_or_expression : String = COMMAND + DASH + OR + DASH + EXPRESSION
|
var link_destination = LINK + DASH + DESTINATION
|
||||||
var link_destination : String = LINK + DASH + DESTINATION
|
|
||||||
|
|
||||||
_states = {}
|
states = {}
|
||||||
|
|
||||||
_states[BASE] = LexerState.new(patterns)
|
states[BASE] = LexerState.new(patterns)
|
||||||
_states[BASE].add_transition(Constants.TokenType.BeginCommand,COMMAND,true)
|
states[BASE].add_transition(Constants.TokenType.BeginCommand, COMMAND, true)
|
||||||
_states[BASE].add_transition(Constants.TokenType.OptionStart,LINK,true)
|
states[BASE].add_transition(Constants.TokenType.OptionStart, LINK, true)
|
||||||
_states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
|
states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
|
||||||
_states[BASE].add_transition(Constants.TokenType.TagMarker,TAG,true)
|
states[BASE].add_transition(Constants.TokenType.TagMarker, TAG, true)
|
||||||
_states[BASE].add_text_rule(Constants.TokenType.Text)
|
states[BASE].add_text_rule(Constants.TokenType.Text)
|
||||||
|
|
||||||
_states[TAG] = LexerState.new(patterns)
|
states[TAG] = LexerState.new(patterns)
|
||||||
_states[TAG].add_transition(Constants.TokenType.Identifier,BASE)
|
states[TAG].add_transition(Constants.TokenType.Identifier, BASE)
|
||||||
|
|
||||||
_states[shortcut_option] = LexerState.new(patterns)
|
states[shortcut_option] = LexerState.new(patterns)
|
||||||
_states[shortcut_option].track_indent = true
|
states[shortcut_option].track_indent = true
|
||||||
_states[shortcut_option].add_transition(Constants.TokenType.BeginCommand,EXPRESSION,true)
|
states[shortcut_option].add_transition(Constants.TokenType.BeginCommand, EXPRESSION, true)
|
||||||
_states[shortcut_option].add_transition(Constants.TokenType.TagMarker,shortcut_option_tag,true)
|
states[shortcut_option].add_transition(Constants.TokenType.TagMarker, shortcut_option_tag, true)
|
||||||
_states[shortcut_option].add_text_rule(Constants.TokenType.Text,BASE)
|
states[shortcut_option].add_text_rule(Constants.TokenType.Text, BASE)
|
||||||
|
|
||||||
_states[shortcut_option_tag] = LexerState.new(patterns)
|
states[shortcut_option_tag] = LexerState.new(patterns)
|
||||||
_states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier,shortcut_option)
|
states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier, shortcut_option)
|
||||||
|
|
||||||
_states[COMMAND] = LexerState.new(patterns)
|
states[COMMAND] = LexerState.new(patterns)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.IfToken,EXPRESSION)
|
states[COMMAND].add_transition(Constants.TokenType.IfToken, EXPRESSION)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.ElseToken)
|
states[COMMAND].add_transition(Constants.TokenType.ElseToken)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.ElseIf,EXPRESSION)
|
states[COMMAND].add_transition(Constants.TokenType.ElseIf, EXPRESSION)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.EndIf)
|
states[COMMAND].add_transition(Constants.TokenType.EndIf)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
|
states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.EndCommand,BASE,true)
|
states[COMMAND].add_transition(Constants.TokenType.EndCommand, BASE, true)
|
||||||
_states[COMMAND].add_transition(Constants.TokenType.Identifier,command_or_expression)
|
states[COMMAND].add_transition(Constants.TokenType.Identifier, command_or_expression)
|
||||||
_states[COMMAND].add_text_rule(Constants.TokenType.Text)
|
states[COMMAND].add_text_rule(Constants.TokenType.Text)
|
||||||
|
|
||||||
_states[command_or_expression] = LexerState.new(patterns)
|
states[command_or_expression] = LexerState.new(patterns)
|
||||||
_states[command_or_expression].add_transition(Constants.TokenType.LeftParen,EXPRESSION)
|
states[command_or_expression].add_transition(Constants.TokenType.LeftParen, EXPRESSION)
|
||||||
_states[command_or_expression].add_transition(Constants.TokenType.EndCommand,BASE,true)
|
states[command_or_expression].add_transition(Constants.TokenType.EndCommand, BASE, true)
|
||||||
_states[command_or_expression].add_text_rule(Constants.TokenType.Text)
|
states[command_or_expression].add_text_rule(Constants.TokenType.Text)
|
||||||
|
|
||||||
_states[ASSIGNMENT] = LexerState.new(patterns)
|
states[ASSIGNMENT] = LexerState.new(patterns)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
|
||||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
|
states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
|
||||||
|
|
||||||
_states[EXPRESSION] = LexerState.new(patterns)
|
states[EXPRESSION] = LexerState.new(patterns)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
|
states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Number)
|
states[EXPRESSION].add_transition(Constants.TokenType.Number)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Str)
|
states[EXPRESSION].add_transition(Constants.TokenType.Str)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
|
states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
|
states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
|
states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
|
states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
|
states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
|
states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
|
states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
|
states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
|
states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Add)
|
states[EXPRESSION].add_transition(Constants.TokenType.Add)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Minus)
|
states[EXPRESSION].add_transition(Constants.TokenType.Minus)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
|
states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Divide)
|
states[EXPRESSION].add_transition(Constants.TokenType.Divide)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
|
states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.And)
|
states[EXPRESSION].add_transition(Constants.TokenType.And)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Or)
|
states[EXPRESSION].add_transition(Constants.TokenType.Or)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Xor)
|
states[EXPRESSION].add_transition(Constants.TokenType.Xor)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Not)
|
states[EXPRESSION].add_transition(Constants.TokenType.Not)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Variable)
|
states[EXPRESSION].add_transition(Constants.TokenType.Variable)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Comma)
|
states[EXPRESSION].add_transition(Constants.TokenType.Comma)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
|
states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
|
states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
|
states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
|
||||||
_states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
|
states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
|
||||||
|
|
||||||
_states[LINK] = LexerState.new(patterns)
|
states[LINK] = LexerState.new(patterns)
|
||||||
_states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
|
states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
|
||||||
_states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
|
states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
|
||||||
_states[LINK].add_text_rule(Constants.TokenType.Text)
|
states[LINK].add_text_rule(Constants.TokenType.Text)
|
||||||
|
|
||||||
_states[link_destination] = LexerState.new(patterns)
|
states[link_destination] = LexerState.new(patterns)
|
||||||
_states[link_destination].add_transition(Constants.TokenType.Identifier)
|
states[link_destination].add_transition(Constants.TokenType.Identifier)
|
||||||
_states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
|
states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
|
||||||
|
|
||||||
_defaultState = _states[BASE]
|
default_state = states[BASE]
|
||||||
|
|
||||||
for stateKey in _states.keys():
|
for key in states.keys():
|
||||||
_states[stateKey].stateName = stateKey
|
states[key].name = key
|
||||||
|
|
||||||
func tokenize():
|
func tokenize():
|
||||||
_indentStack.clear()
|
|
||||||
_indentStack.push_front(IntBoolPair.new(0, false))
|
|
||||||
_shouldTrackIndent = false
|
|
||||||
|
|
||||||
var tokens = []
|
var tokens = []
|
||||||
|
|
||||||
_currentState = _defaultState
|
indent_stack.clear()
|
||||||
|
indent_stack.push_front([0, false])
|
||||||
|
should_track_indent = false
|
||||||
|
current_state = default_state
|
||||||
|
|
||||||
var lines = text.split(LINE_SEPARATOR)
|
var lines = text.split(LINE_SEPARATOR)
|
||||||
lines.append('')
|
var line_number = 1
|
||||||
|
|
||||||
var line_number : int = 1
|
lines.append('')
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
tokens += tokenize_line(line, line_number)
|
tokens += tokenize_line(line, line_number)
|
||||||
line_number += 1
|
line_number += 1
|
||||||
|
|
||||||
var endOfInput = Token.new(
|
var end_of_input = Token.new(
|
||||||
Constants.TokenType.EndOfInput,
|
Constants.TokenType.EndOfInput,
|
||||||
_currentState,
|
current_state,
|
||||||
line_number,
|
line_number,
|
||||||
0
|
0
|
||||||
)
|
)
|
||||||
tokens.append(endOfInput)
|
tokens.append(end_of_input)
|
||||||
|
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
func tokenize_line(line, line_number):
|
func tokenize_line(line, line_number):
|
||||||
var tokenStack : Array = []
|
var token_stack = []
|
||||||
|
|
||||||
var freshLine = line.replace('\t',' ').replace('\r','')
|
var fresh_line = line.replace('\t',' ').replace('\r','')
|
||||||
|
|
||||||
#record indentation
|
|
||||||
var indentation = line_indentation(line)
|
var indentation = line_indentation(line)
|
||||||
var prevIndentation = _indentStack.front()
|
var previous_indentation = indent_stack.front()[0]
|
||||||
|
|
||||||
if _shouldTrackIndent && indentation > prevIndentation.key:
|
if should_track_indent && indentation > previous_indentation:
|
||||||
#we add an indenation token to record indent level
|
indent_stack.push_front([indentation, true])
|
||||||
_indentStack.push_front(IntBoolPair.new(indentation,true))
|
|
||||||
|
|
||||||
var indent : Token = Token.new(
|
var indent = Token.new(
|
||||||
Constants.TokenType.Indent,
|
Constants.TokenType.Indent,
|
||||||
_currentState,
|
current_state,
|
||||||
filename,
|
filename,
|
||||||
line_number,
|
line_number,
|
||||||
prevIndentation.key
|
previous_indentation
|
||||||
)
|
)
|
||||||
indent.value = '%*s' % [indentation - prevIndentation.key,'']
|
indent.value = '%*s' % [indentation - previous_indentation, '']
|
||||||
|
|
||||||
_shouldTrackIndent = false
|
should_track_indent = false
|
||||||
tokenStack.push_front(indent)
|
token_stack.push_front(indent)
|
||||||
|
|
||||||
elif indentation < prevIndentation.key:
|
elif indentation < previous_indentation:
|
||||||
#de-indent and then emit indentaiton token
|
while indentation < indent_stack.front()[0]:
|
||||||
|
var top = indent_stack.pop_front()[1]
|
||||||
while indentation < _indentStack.front().key:
|
if top:
|
||||||
var top : IntBoolPair = _indentStack.pop_front()
|
var deindent = Token.new(Constants.TokenType.Dedent, current_state, line_number, 0)
|
||||||
if top.value:
|
token_stack.push_front(deindent)
|
||||||
var deIndent : Token = Token.new(Constants.TokenType.Dedent,_currentState,line_number,0)
|
|
||||||
tokenStack.push_front(deIndent)
|
|
||||||
|
|
||||||
|
var column = indentation
|
||||||
var column : int = indentation
|
var whitespace = RegEx.new()
|
||||||
|
whitespace.compile(WHITESPACE)
|
||||||
|
|
||||||
var whitespace : RegEx = RegEx.new()
|
while column < fresh_line.length():
|
||||||
var error = whitespace.compile(WHITESPACE)
|
if fresh_line.substr(column).begins_with(LINE_COMENT):
|
||||||
if error != OK:
|
|
||||||
printerr('unable to compile regex WHITESPACE')
|
|
||||||
return []
|
|
||||||
|
|
||||||
while column < freshLine.length():
|
|
||||||
|
|
||||||
if freshLine.substr(column).begins_with(LINE_COMENT):
|
|
||||||
break
|
break
|
||||||
|
|
||||||
var matched : bool = false
|
var matched = false
|
||||||
|
|
||||||
for rule in _currentState.rules:
|
for rule in current_state.rules:
|
||||||
var found = rule.regex.search(freshLine, column)
|
var found = rule.regex.search(fresh_line, column)
|
||||||
|
|
||||||
if !found:
|
if !found:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
var tokenText : String
|
var token_text = ''
|
||||||
|
|
||||||
|
# NOTE: If this is text then we back up to the most recent delimiting token
|
||||||
|
# and treat everything from there as text.
|
||||||
if rule.token_type == Constants.TokenType.Text:
|
if rule.token_type == Constants.TokenType.Text:
|
||||||
#if this is text then we back up to the most recent
|
|
||||||
#delimiting token and treat everything from there as text.
|
|
||||||
|
|
||||||
var startIndex : int = indentation
|
var start_index = indentation
|
||||||
|
|
||||||
if tokenStack.size() > 0 :
|
if token_stack.size() > 0 :
|
||||||
while tokenStack.front().type == Constants.TokenType.Identifier:
|
while token_stack.front().type == Constants.TokenType.Identifier:
|
||||||
tokenStack.pop_front()
|
token_stack.pop_front()
|
||||||
|
|
||||||
var startDelimitToken : Token = tokenStack.front()
|
var start_delimit_token = token_stack.front()
|
||||||
startIndex = startDelimitToken.column
|
start_index = start_delimit_token.column
|
||||||
|
|
||||||
if startDelimitToken.type == Constants.TokenType.Indent:
|
if start_delimit_token.type == Constants.TokenType.Indent:
|
||||||
startIndex += startDelimitToken.value.length()
|
start_index += start_delimit_token.value.length()
|
||||||
if startDelimitToken.type == Constants.TokenType.Dedent:
|
if start_delimit_token.type == Constants.TokenType.Dedent:
|
||||||
startIndex = indentation
|
start_index = indentation
|
||||||
#
|
|
||||||
|
|
||||||
column = startIndex
|
column = start_index
|
||||||
var end_index = found.get_start() + found.get_string().length()
|
var end_index = found.get_start() + found.get_string().length()
|
||||||
|
|
||||||
tokenText = freshLine.substr(startIndex, end_index - startIndex)
|
token_text = fresh_line.substr(start_index, end_index - start_index)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
tokenText = found.get_string()
|
token_text = found.get_string()
|
||||||
|
|
||||||
column += tokenText.length()
|
column += token_text.length()
|
||||||
|
|
||||||
#pre-proccess string
|
|
||||||
if rule.token_type == Constants.TokenType.Str:
|
if rule.token_type == Constants.TokenType.Str:
|
||||||
tokenText = tokenText.substr(1, tokenText.length() - 2)
|
token_text = token_text.substr(1, token_text.length() - 2)
|
||||||
tokenText = tokenText.replace('\\\\', '\\')
|
token_text = token_text.replace('\\\\', '\\')
|
||||||
tokenText = tokenText.replace('\\\'','\'')
|
token_text = token_text.replace('\\\'','\'')
|
||||||
|
|
||||||
var token = Token.new(
|
var token = Token.new(
|
||||||
rule.token_type,
|
rule.token_type,
|
||||||
_currentState,
|
current_state,
|
||||||
filename,
|
filename,
|
||||||
line_number,
|
line_number,
|
||||||
column,
|
column,
|
||||||
tokenText
|
token_text
|
||||||
)
|
)
|
||||||
token.delimits_text = rule.delimits_text
|
token.delimits_text = rule.delimits_text
|
||||||
|
|
||||||
tokenStack.push_front(token)
|
token_stack.push_front(token)
|
||||||
|
|
||||||
if rule.enter_state != null and rule.enter_state.length() > 0:
|
if rule.enter_state != null and rule.enter_state.length() > 0:
|
||||||
if not _states.has(rule.enter_state):
|
if not states.has(rule.enter_state):
|
||||||
printerr('State[%s] not known - line(%s) col(%s)' % [rule.enter_state, line_number, column])
|
printerr('State[%s] not known - line(%s) col(%s)' % [rule.enter_state, line_number, column])
|
||||||
return []
|
return []
|
||||||
|
|
||||||
enter_state(_states[rule.enter_state])
|
enter_state(states[rule.enter_state])
|
||||||
|
|
||||||
if _shouldTrackIndent:
|
if should_track_indent:
|
||||||
if _indentStack.front().key < indentation:
|
if indent_stack.front()[0] < indentation:
|
||||||
_indentStack.append(IntBoolPair.new(indentation, false))
|
indent_stack.append([indentation, false])
|
||||||
|
|
||||||
matched = true
|
matched = true
|
||||||
break
|
break
|
||||||
|
|
||||||
if not matched:
|
if not matched:
|
||||||
var rules = []
|
var rules = []
|
||||||
for rule in _currentState.rules:
|
for rule in current_state.rules:
|
||||||
rules.append('"%s" (%s)' % [Constants.token_type_name(rule.token_type), rule.human_readable_identifier])
|
rules.append('"%s" (%s)' % [Constants.token_type_name(rule.token_type), rule.human_readable_identifier])
|
||||||
|
|
||||||
var error_data = [
|
var error_data = [
|
||||||
|
@ -340,30 +325,30 @@ func tokenize_line(line, line_number):
|
||||||
]
|
]
|
||||||
assert(false, 'Expected %s in file %s in node "%s" on line #%d (column #%d)' % error_data)
|
assert(false, 'Expected %s in file %s in node "%s" on line #%d (column #%d)' % error_data)
|
||||||
|
|
||||||
var lastWhiteSpace = whitespace.search(line, column)
|
var last_whitespace = whitespace.search(line, column)
|
||||||
if lastWhiteSpace:
|
if last_whitespace:
|
||||||
column += lastWhiteSpace.get_string().length()
|
column += last_whitespace.get_string().length()
|
||||||
|
|
||||||
|
|
||||||
tokenStack.invert()
|
token_stack.invert()
|
||||||
|
|
||||||
return tokenStack
|
return token_stack
|
||||||
|
|
||||||
func line_indentation(line:String)->int:
|
func line_indentation(line):
|
||||||
var indentRegex : RegEx = RegEx.new()
|
var indent_regex = RegEx.new()
|
||||||
indentRegex.compile('^(\\s*)')
|
indent_regex.compile('^(\\s*)')
|
||||||
|
|
||||||
var found : RegExMatch = indentRegex.search(line)
|
var found = indent_regex.search(line)
|
||||||
|
|
||||||
if !found || found.get_string().length() <= 0:
|
if !found or found.get_string().length() <= 0:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
return found.get_string().length()
|
return found.get_string().length()
|
||||||
|
|
||||||
func enter_state(state:LexerState):
|
func enter_state(state):
|
||||||
_currentState = state;
|
current_state = state;
|
||||||
if _currentState.track_indent:
|
if current_state.track_indent:
|
||||||
_shouldTrackIndent = true
|
should_track_indent = true
|
||||||
|
|
||||||
class Token:
|
class Token:
|
||||||
var type = -1
|
var type = -1
|
||||||
|
@ -375,38 +360,36 @@ class Token:
|
||||||
var text = ''
|
var text = ''
|
||||||
|
|
||||||
var delimits_text = false
|
var delimits_text = false
|
||||||
var paramCount = -1
|
var parameter_count = -1
|
||||||
var lexerState = ''
|
var lexer_state = ''
|
||||||
|
|
||||||
func _init(_type, _state, _filename, _line_number = -1, _column = -1, _value = ''):
|
func _init(_type, _state, _filename, _line_number = -1, _column = -1, _value = ''):
|
||||||
type = _type
|
type = _type
|
||||||
lexerState = _state.stateName
|
lexer_state = _state.name
|
||||||
filename = _filename
|
filename = _filename
|
||||||
line_number = _line_number
|
line_number = _line_number
|
||||||
column = _column
|
column = _column
|
||||||
value = _value
|
value = _value
|
||||||
|
|
||||||
func _to_string():
|
func _to_string():
|
||||||
return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value,line_number,column,lexerState]
|
return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value, line_number, column, lexer_state]
|
||||||
|
|
||||||
class LexerState:
|
class LexerState:
|
||||||
|
var name = ''
|
||||||
var stateName : String
|
var patterns = {}
|
||||||
var patterns : Dictionary
|
var rules = []
|
||||||
var rules : Array = []
|
var track_indent = false
|
||||||
var track_indent : bool = false
|
|
||||||
|
|
||||||
func _init(_patterns):
|
func _init(_patterns):
|
||||||
patterns = _patterns
|
patterns = _patterns
|
||||||
|
|
||||||
func add_transition(type : int, state : String = '',delimitText : bool = false)->Rule:
|
func add_transition(type, state = '', delimit_text = false):
|
||||||
var pattern = '\\G%s' % patterns[type][0]
|
var pattern = '\\G%s' % patterns[type][0]
|
||||||
# print('pattern = %s' % pattern)
|
var rule = Rule.new(type, pattern, patterns[type][1], state, delimit_text)
|
||||||
var rule = Rule.new(type, pattern, patterns[type][1], state, delimitText)
|
|
||||||
rules.append(rule)
|
rules.append(rule)
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
func add_text_rule(type : int, state : String = '')->Rule:
|
func add_text_rule(type, state = ''):
|
||||||
if contains_text_rule() :
|
if contains_text_rule() :
|
||||||
printerr('State already contains Text rule')
|
printerr('State already contains Text rule')
|
||||||
return null
|
return null
|
||||||
|
@ -417,25 +400,25 @@ class LexerState:
|
||||||
delimiters.append('%s' % rule.regex.get_pattern().substr(2))
|
delimiters.append('%s' % rule.regex.get_pattern().substr(2))
|
||||||
|
|
||||||
var pattern = '\\G((?!%s).)*' % [PoolStringArray(delimiters).join('|')]
|
var pattern = '\\G((?!%s).)*' % [PoolStringArray(delimiters).join('|')]
|
||||||
var rule : Rule = add_transition(type,state)
|
var rule = add_transition(type, state)
|
||||||
rule.regex = RegEx.new()
|
rule.regex = RegEx.new()
|
||||||
rule.regex.compile(pattern)
|
rule.regex.compile(pattern)
|
||||||
rule.is_text_rule = true
|
rule.is_text_rule = true
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
func contains_text_rule()->bool:
|
func contains_text_rule():
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
if rule.is_text_rule:
|
if rule.is_text_rule:
|
||||||
return true
|
return true
|
||||||
return false
|
return false
|
||||||
|
|
||||||
class Rule:
|
class Rule:
|
||||||
var regex : RegEx
|
var regex
|
||||||
|
|
||||||
var enter_state : String
|
var enter_state = ''
|
||||||
var token_type : int
|
var token_type = -1
|
||||||
var is_text_rule : bool
|
var is_text_rule = false
|
||||||
var delimits_text : bool
|
var delimits_text = false
|
||||||
var human_readable_identifier = ''
|
var human_readable_identifier = ''
|
||||||
|
|
||||||
func _init(_type, _regex, _human_readable_identifier, _enter_state, _delimits_text):
|
func _init(_type, _regex, _human_readable_identifier, _enter_state, _delimits_text):
|
||||||
|
@ -450,12 +433,3 @@ class Rule:
|
||||||
|
|
||||||
func _to_string():
|
func _to_string():
|
||||||
return '[Rule : %s (%s) - %s]' % [Constants.token_type_name(token_type), human_readable_identifier, regex]
|
return '[Rule : %s (%s) - %s]' % [Constants.token_type_name(token_type), human_readable_identifier, regex]
|
||||||
|
|
||||||
class IntBoolPair:
|
|
||||||
var key = -1
|
|
||||||
var value = false
|
|
||||||
|
|
||||||
func _init(_key, _value):
|
|
||||||
key = _key
|
|
||||||
value = _value
|
|
||||||
|
|
||||||
|
|
Reference in a new issue