cleaned up lexer
This commit is contained in:
parent
83c4808d5a
commit
9f60bbfd23
|
@ -21,26 +21,26 @@ const DESTINATION = 'destination'
|
|||
|
||||
var WHITESPACE = '\\s*'
|
||||
|
||||
var _states = {}
|
||||
var _defaultState
|
||||
var _currentState
|
||||
|
||||
var _indentStack = []
|
||||
var _shouldTrackIndent : bool = false
|
||||
|
||||
var filename = ''
|
||||
var title = ''
|
||||
var text = ''
|
||||
|
||||
var states = {}
|
||||
var default_state
|
||||
var current_state
|
||||
|
||||
var indent_stack = []
|
||||
var should_track_indent = false
|
||||
|
||||
func _init(_filename, _title, _text):
|
||||
create_states()
|
||||
createstates()
|
||||
|
||||
filename = _filename
|
||||
title = _title
|
||||
text = _text
|
||||
|
||||
func create_states():
|
||||
var patterns : Dictionary = {}
|
||||
func createstates():
|
||||
var patterns = {}
|
||||
patterns[Constants.TokenType.Text] = ['.*', 'any text']
|
||||
|
||||
patterns[Constants.TokenType.Number] = ['\\-?[0-9]+(\\.[0-9+])?', 'any number']
|
||||
|
@ -86,249 +86,234 @@ func create_states():
|
|||
patterns[Constants.TokenType.Set] = ['set(?!\\w)', '"set"']
|
||||
patterns[Constants.TokenType.ShortcutOption] = ['\\-\\>\\s*', '"->"']
|
||||
|
||||
#compound states
|
||||
var shortcut_option : String = SHORTCUT + DASH + OPTION
|
||||
var shortcut_option_tag : String = shortcut_option + DASH + TAG
|
||||
var command_or_expression : String = COMMAND + DASH + OR + DASH + EXPRESSION
|
||||
var link_destination : String = LINK + DASH + DESTINATION
|
||||
var shortcut_option = SHORTCUT + DASH + OPTION
|
||||
var shortcut_option_tag = shortcut_option + DASH + TAG
|
||||
var command_or_expression = COMMAND + DASH + OR + DASH + EXPRESSION
|
||||
var link_destination = LINK + DASH + DESTINATION
|
||||
|
||||
_states = {}
|
||||
states = {}
|
||||
|
||||
_states[BASE] = LexerState.new(patterns)
|
||||
_states[BASE].add_transition(Constants.TokenType.BeginCommand,COMMAND,true)
|
||||
_states[BASE].add_transition(Constants.TokenType.OptionStart,LINK,true)
|
||||
_states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
|
||||
_states[BASE].add_transition(Constants.TokenType.TagMarker,TAG,true)
|
||||
_states[BASE].add_text_rule(Constants.TokenType.Text)
|
||||
states[BASE] = LexerState.new(patterns)
|
||||
states[BASE].add_transition(Constants.TokenType.BeginCommand, COMMAND, true)
|
||||
states[BASE].add_transition(Constants.TokenType.OptionStart, LINK, true)
|
||||
states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
|
||||
states[BASE].add_transition(Constants.TokenType.TagMarker, TAG, true)
|
||||
states[BASE].add_text_rule(Constants.TokenType.Text)
|
||||
|
||||
_states[TAG] = LexerState.new(patterns)
|
||||
_states[TAG].add_transition(Constants.TokenType.Identifier,BASE)
|
||||
states[TAG] = LexerState.new(patterns)
|
||||
states[TAG].add_transition(Constants.TokenType.Identifier, BASE)
|
||||
|
||||
_states[shortcut_option] = LexerState.new(patterns)
|
||||
_states[shortcut_option].track_indent = true
|
||||
_states[shortcut_option].add_transition(Constants.TokenType.BeginCommand,EXPRESSION,true)
|
||||
_states[shortcut_option].add_transition(Constants.TokenType.TagMarker,shortcut_option_tag,true)
|
||||
_states[shortcut_option].add_text_rule(Constants.TokenType.Text,BASE)
|
||||
states[shortcut_option] = LexerState.new(patterns)
|
||||
states[shortcut_option].track_indent = true
|
||||
states[shortcut_option].add_transition(Constants.TokenType.BeginCommand, EXPRESSION, true)
|
||||
states[shortcut_option].add_transition(Constants.TokenType.TagMarker, shortcut_option_tag, true)
|
||||
states[shortcut_option].add_text_rule(Constants.TokenType.Text, BASE)
|
||||
|
||||
_states[shortcut_option_tag] = LexerState.new(patterns)
|
||||
_states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier,shortcut_option)
|
||||
states[shortcut_option_tag] = LexerState.new(patterns)
|
||||
states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier, shortcut_option)
|
||||
|
||||
_states[COMMAND] = LexerState.new(patterns)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.IfToken,EXPRESSION)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.ElseToken)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.ElseIf,EXPRESSION)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.EndIf)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.EndCommand,BASE,true)
|
||||
_states[COMMAND].add_transition(Constants.TokenType.Identifier,command_or_expression)
|
||||
_states[COMMAND].add_text_rule(Constants.TokenType.Text)
|
||||
states[COMMAND] = LexerState.new(patterns)
|
||||
states[COMMAND].add_transition(Constants.TokenType.IfToken, EXPRESSION)
|
||||
states[COMMAND].add_transition(Constants.TokenType.ElseToken)
|
||||
states[COMMAND].add_transition(Constants.TokenType.ElseIf, EXPRESSION)
|
||||
states[COMMAND].add_transition(Constants.TokenType.EndIf)
|
||||
states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
|
||||
states[COMMAND].add_transition(Constants.TokenType.EndCommand, BASE, true)
|
||||
states[COMMAND].add_transition(Constants.TokenType.Identifier, command_or_expression)
|
||||
states[COMMAND].add_text_rule(Constants.TokenType.Text)
|
||||
|
||||
_states[command_or_expression] = LexerState.new(patterns)
|
||||
_states[command_or_expression].add_transition(Constants.TokenType.LeftParen,EXPRESSION)
|
||||
_states[command_or_expression].add_transition(Constants.TokenType.EndCommand,BASE,true)
|
||||
_states[command_or_expression].add_text_rule(Constants.TokenType.Text)
|
||||
states[command_or_expression] = LexerState.new(patterns)
|
||||
states[command_or_expression].add_transition(Constants.TokenType.LeftParen, EXPRESSION)
|
||||
states[command_or_expression].add_transition(Constants.TokenType.EndCommand, BASE, true)
|
||||
states[command_or_expression].add_text_rule(Constants.TokenType.Text)
|
||||
|
||||
_states[ASSIGNMENT] = LexerState.new(patterns)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
|
||||
_states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
|
||||
states[ASSIGNMENT] = LexerState.new(patterns)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
|
||||
states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
|
||||
|
||||
_states[EXPRESSION] = LexerState.new(patterns)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Number)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Str)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Add)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Minus)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Divide)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.And)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Or)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Xor)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Not)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Variable)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Comma)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
|
||||
_states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
|
||||
states[EXPRESSION] = LexerState.new(patterns)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Number)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Str)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Add)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Minus)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Divide)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.And)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Or)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Xor)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Not)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Variable)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Comma)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
|
||||
states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
|
||||
|
||||
_states[LINK] = LexerState.new(patterns)
|
||||
_states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
|
||||
_states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
|
||||
_states[LINK].add_text_rule(Constants.TokenType.Text)
|
||||
states[LINK] = LexerState.new(patterns)
|
||||
states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
|
||||
states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
|
||||
states[LINK].add_text_rule(Constants.TokenType.Text)
|
||||
|
||||
_states[link_destination] = LexerState.new(patterns)
|
||||
_states[link_destination].add_transition(Constants.TokenType.Identifier)
|
||||
_states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
|
||||
states[link_destination] = LexerState.new(patterns)
|
||||
states[link_destination].add_transition(Constants.TokenType.Identifier)
|
||||
states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
|
||||
|
||||
_defaultState = _states[BASE]
|
||||
default_state = states[BASE]
|
||||
|
||||
for stateKey in _states.keys():
|
||||
_states[stateKey].stateName = stateKey
|
||||
for key in states.keys():
|
||||
states[key].name = key
|
||||
|
||||
func tokenize():
|
||||
_indentStack.clear()
|
||||
_indentStack.push_front(IntBoolPair.new(0, false))
|
||||
_shouldTrackIndent = false
|
||||
|
||||
var tokens = []
|
||||
|
||||
_currentState = _defaultState
|
||||
indent_stack.clear()
|
||||
indent_stack.push_front([0, false])
|
||||
should_track_indent = false
|
||||
current_state = default_state
|
||||
|
||||
var lines = text.split(LINE_SEPARATOR)
|
||||
lines.append('')
|
||||
var line_number = 1
|
||||
|
||||
var line_number : int = 1
|
||||
lines.append('')
|
||||
|
||||
for line in lines:
|
||||
tokens += tokenize_line(line, line_number)
|
||||
line_number += 1
|
||||
|
||||
var endOfInput = Token.new(
|
||||
var end_of_input = Token.new(
|
||||
Constants.TokenType.EndOfInput,
|
||||
_currentState,
|
||||
current_state,
|
||||
line_number,
|
||||
0
|
||||
)
|
||||
tokens.append(endOfInput)
|
||||
tokens.append(end_of_input)
|
||||
|
||||
return tokens
|
||||
|
||||
func tokenize_line(line, line_number):
|
||||
var tokenStack : Array = []
|
||||
var token_stack = []
|
||||
|
||||
var freshLine = line.replace('\t',' ').replace('\r','')
|
||||
var fresh_line = line.replace('\t',' ').replace('\r','')
|
||||
|
||||
#record indentation
|
||||
var indentation = line_indentation(line)
|
||||
var prevIndentation = _indentStack.front()
|
||||
var previous_indentation = indent_stack.front()[0]
|
||||
|
||||
if _shouldTrackIndent && indentation > prevIndentation.key:
|
||||
#we add an indenation token to record indent level
|
||||
_indentStack.push_front(IntBoolPair.new(indentation,true))
|
||||
if should_track_indent && indentation > previous_indentation:
|
||||
indent_stack.push_front([indentation, true])
|
||||
|
||||
var indent : Token = Token.new(
|
||||
var indent = Token.new(
|
||||
Constants.TokenType.Indent,
|
||||
_currentState,
|
||||
current_state,
|
||||
filename,
|
||||
line_number,
|
||||
prevIndentation.key
|
||||
previous_indentation
|
||||
)
|
||||
indent.value = '%*s' % [indentation - prevIndentation.key,'']
|
||||
indent.value = '%*s' % [indentation - previous_indentation, '']
|
||||
|
||||
_shouldTrackIndent = false
|
||||
tokenStack.push_front(indent)
|
||||
should_track_indent = false
|
||||
token_stack.push_front(indent)
|
||||
|
||||
elif indentation < prevIndentation.key:
|
||||
#de-indent and then emit indentaiton token
|
||||
|
||||
while indentation < _indentStack.front().key:
|
||||
var top : IntBoolPair = _indentStack.pop_front()
|
||||
if top.value:
|
||||
var deIndent : Token = Token.new(Constants.TokenType.Dedent,_currentState,line_number,0)
|
||||
tokenStack.push_front(deIndent)
|
||||
elif indentation < previous_indentation:
|
||||
while indentation < indent_stack.front()[0]:
|
||||
var top = indent_stack.pop_front()[1]
|
||||
if top:
|
||||
var deindent = Token.new(Constants.TokenType.Dedent, current_state, line_number, 0)
|
||||
token_stack.push_front(deindent)
|
||||
|
||||
|
||||
var column : int = indentation
|
||||
var column = indentation
|
||||
var whitespace = RegEx.new()
|
||||
whitespace.compile(WHITESPACE)
|
||||
|
||||
var whitespace : RegEx = RegEx.new()
|
||||
var error = whitespace.compile(WHITESPACE)
|
||||
if error != OK:
|
||||
printerr('unable to compile regex WHITESPACE')
|
||||
return []
|
||||
|
||||
while column < freshLine.length():
|
||||
|
||||
if freshLine.substr(column).begins_with(LINE_COMENT):
|
||||
while column < fresh_line.length():
|
||||
if fresh_line.substr(column).begins_with(LINE_COMENT):
|
||||
break
|
||||
|
||||
var matched : bool = false
|
||||
var matched = false
|
||||
|
||||
for rule in _currentState.rules:
|
||||
var found = rule.regex.search(freshLine, column)
|
||||
for rule in current_state.rules:
|
||||
var found = rule.regex.search(fresh_line, column)
|
||||
|
||||
if !found:
|
||||
continue
|
||||
|
||||
var tokenText : String
|
||||
var token_text = ''
|
||||
|
||||
# NOTE: If this is text then we back up to the most recent delimiting token
|
||||
# and treat everything from there as text.
|
||||
if rule.token_type == Constants.TokenType.Text:
|
||||
#if this is text then we back up to the most recent
|
||||
#delimiting token and treat everything from there as text.
|
||||
|
||||
var startIndex : int = indentation
|
||||
var start_index = indentation
|
||||
|
||||
if tokenStack.size() > 0 :
|
||||
while tokenStack.front().type == Constants.TokenType.Identifier:
|
||||
tokenStack.pop_front()
|
||||
if token_stack.size() > 0 :
|
||||
while token_stack.front().type == Constants.TokenType.Identifier:
|
||||
token_stack.pop_front()
|
||||
|
||||
var startDelimitToken : Token = tokenStack.front()
|
||||
startIndex = startDelimitToken.column
|
||||
var start_delimit_token = token_stack.front()
|
||||
start_index = start_delimit_token.column
|
||||
|
||||
if startDelimitToken.type == Constants.TokenType.Indent:
|
||||
startIndex += startDelimitToken.value.length()
|
||||
if startDelimitToken.type == Constants.TokenType.Dedent:
|
||||
startIndex = indentation
|
||||
#
|
||||
if start_delimit_token.type == Constants.TokenType.Indent:
|
||||
start_index += start_delimit_token.value.length()
|
||||
if start_delimit_token.type == Constants.TokenType.Dedent:
|
||||
start_index = indentation
|
||||
|
||||
column = startIndex
|
||||
column = start_index
|
||||
var end_index = found.get_start() + found.get_string().length()
|
||||
|
||||
tokenText = freshLine.substr(startIndex, end_index - startIndex)
|
||||
|
||||
token_text = fresh_line.substr(start_index, end_index - start_index)
|
||||
else:
|
||||
tokenText = found.get_string()
|
||||
token_text = found.get_string()
|
||||
|
||||
column += tokenText.length()
|
||||
column += token_text.length()
|
||||
|
||||
#pre-proccess string
|
||||
if rule.token_type == Constants.TokenType.Str:
|
||||
tokenText = tokenText.substr(1, tokenText.length() - 2)
|
||||
tokenText = tokenText.replace('\\\\', '\\')
|
||||
tokenText = tokenText.replace('\\\'','\'')
|
||||
token_text = token_text.substr(1, token_text.length() - 2)
|
||||
token_text = token_text.replace('\\\\', '\\')
|
||||
token_text = token_text.replace('\\\'','\'')
|
||||
|
||||
var token = Token.new(
|
||||
rule.token_type,
|
||||
_currentState,
|
||||
current_state,
|
||||
filename,
|
||||
line_number,
|
||||
column,
|
||||
tokenText
|
||||
token_text
|
||||
)
|
||||
token.delimits_text = rule.delimits_text
|
||||
|
||||
tokenStack.push_front(token)
|
||||
token_stack.push_front(token)
|
||||
|
||||
if rule.enter_state != null and rule.enter_state.length() > 0:
|
||||
if not _states.has(rule.enter_state):
|
||||
if not states.has(rule.enter_state):
|
||||
printerr('State[%s] not known - line(%s) col(%s)' % [rule.enter_state, line_number, column])
|
||||
return []
|
||||
|
||||
enter_state(_states[rule.enter_state])
|
||||
enter_state(states[rule.enter_state])
|
||||
|
||||
if _shouldTrackIndent:
|
||||
if _indentStack.front().key < indentation:
|
||||
_indentStack.append(IntBoolPair.new(indentation, false))
|
||||
if should_track_indent:
|
||||
if indent_stack.front()[0] < indentation:
|
||||
indent_stack.append([indentation, false])
|
||||
|
||||
matched = true
|
||||
break
|
||||
|
||||
if not matched:
|
||||
var rules = []
|
||||
for rule in _currentState.rules:
|
||||
for rule in current_state.rules:
|
||||
rules.append('"%s" (%s)' % [Constants.token_type_name(rule.token_type), rule.human_readable_identifier])
|
||||
|
||||
var error_data = [
|
||||
|
@ -340,30 +325,30 @@ func tokenize_line(line, line_number):
|
|||
]
|
||||
assert(false, 'Expected %s in file %s in node "%s" on line #%d (column #%d)' % error_data)
|
||||
|
||||
var lastWhiteSpace = whitespace.search(line, column)
|
||||
if lastWhiteSpace:
|
||||
column += lastWhiteSpace.get_string().length()
|
||||
var last_whitespace = whitespace.search(line, column)
|
||||
if last_whitespace:
|
||||
column += last_whitespace.get_string().length()
|
||||
|
||||
|
||||
tokenStack.invert()
|
||||
token_stack.invert()
|
||||
|
||||
return tokenStack
|
||||
return token_stack
|
||||
|
||||
func line_indentation(line:String)->int:
|
||||
var indentRegex : RegEx = RegEx.new()
|
||||
indentRegex.compile('^(\\s*)')
|
||||
func line_indentation(line):
|
||||
var indent_regex = RegEx.new()
|
||||
indent_regex.compile('^(\\s*)')
|
||||
|
||||
var found : RegExMatch = indentRegex.search(line)
|
||||
var found = indent_regex.search(line)
|
||||
|
||||
if !found || found.get_string().length() <= 0:
|
||||
if !found or found.get_string().length() <= 0:
|
||||
return 0
|
||||
|
||||
return found.get_string().length()
|
||||
|
||||
func enter_state(state:LexerState):
|
||||
_currentState = state;
|
||||
if _currentState.track_indent:
|
||||
_shouldTrackIndent = true
|
||||
func enter_state(state):
|
||||
current_state = state;
|
||||
if current_state.track_indent:
|
||||
should_track_indent = true
|
||||
|
||||
class Token:
|
||||
var type = -1
|
||||
|
@ -375,38 +360,36 @@ class Token:
|
|||
var text = ''
|
||||
|
||||
var delimits_text = false
|
||||
var paramCount = -1
|
||||
var lexerState = ''
|
||||
var parameter_count = -1
|
||||
var lexer_state = ''
|
||||
|
||||
func _init(_type, _state, _filename, _line_number = -1, _column = -1, _value = ''):
|
||||
type = _type
|
||||
lexerState = _state.stateName
|
||||
lexer_state = _state.name
|
||||
filename = _filename
|
||||
line_number = _line_number
|
||||
column = _column
|
||||
value = _value
|
||||
|
||||
func _to_string():
|
||||
return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value,line_number,column,lexerState]
|
||||
return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value, line_number, column, lexer_state]
|
||||
|
||||
class LexerState:
|
||||
|
||||
var stateName : String
|
||||
var patterns : Dictionary
|
||||
var rules : Array = []
|
||||
var track_indent : bool = false
|
||||
var name = ''
|
||||
var patterns = {}
|
||||
var rules = []
|
||||
var track_indent = false
|
||||
|
||||
func _init(_patterns):
|
||||
patterns = _patterns
|
||||
|
||||
func add_transition(type : int, state : String = '',delimitText : bool = false)->Rule:
|
||||
func add_transition(type, state = '', delimit_text = false):
|
||||
var pattern = '\\G%s' % patterns[type][0]
|
||||
# print('pattern = %s' % pattern)
|
||||
var rule = Rule.new(type, pattern, patterns[type][1], state, delimitText)
|
||||
var rule = Rule.new(type, pattern, patterns[type][1], state, delimit_text)
|
||||
rules.append(rule)
|
||||
return rule
|
||||
|
||||
func add_text_rule(type : int, state : String = '')->Rule:
|
||||
func add_text_rule(type, state = ''):
|
||||
if contains_text_rule() :
|
||||
printerr('State already contains Text rule')
|
||||
return null
|
||||
|
@ -417,25 +400,25 @@ class LexerState:
|
|||
delimiters.append('%s' % rule.regex.get_pattern().substr(2))
|
||||
|
||||
var pattern = '\\G((?!%s).)*' % [PoolStringArray(delimiters).join('|')]
|
||||
var rule : Rule = add_transition(type,state)
|
||||
var rule = add_transition(type, state)
|
||||
rule.regex = RegEx.new()
|
||||
rule.regex.compile(pattern)
|
||||
rule.is_text_rule = true
|
||||
return rule
|
||||
|
||||
func contains_text_rule()->bool:
|
||||
func contains_text_rule():
|
||||
for rule in rules:
|
||||
if rule.is_text_rule:
|
||||
return true
|
||||
return false
|
||||
|
||||
class Rule:
|
||||
var regex : RegEx
|
||||
var regex
|
||||
|
||||
var enter_state : String
|
||||
var token_type : int
|
||||
var is_text_rule : bool
|
||||
var delimits_text : bool
|
||||
var enter_state = ''
|
||||
var token_type = -1
|
||||
var is_text_rule = false
|
||||
var delimits_text = false
|
||||
var human_readable_identifier = ''
|
||||
|
||||
func _init(_type, _regex, _human_readable_identifier, _enter_state, _delimits_text):
|
||||
|
@ -450,12 +433,3 @@ class Rule:
|
|||
|
||||
func _to_string():
|
||||
return '[Rule : %s (%s) - %s]' % [Constants.token_type_name(token_type), human_readable_identifier, regex]
|
||||
|
||||
class IntBoolPair:
|
||||
var key = -1
|
||||
var value = false
|
||||
|
||||
func _init(_key, _value):
|
||||
key = _key
|
||||
value = _value
|
||||
|
||||
|
|
Reference in a new issue