cleaned up lexer

2021-11-21 15:03:56 +01:00 · 2021-11-21 15:03:56 +01:00 · 9f60bbfd23
parent 83c4808d5a
commit 9f60bbfd23
1 changed files with 183 additions and 209 deletions
--- a/addons/Wol/core/compiler/lexer.gd
+++ b/addons/Wol/core/compiler/lexer.gd
@ -21,26 +21,26 @@ const DESTINATION = 'destination'
 var WHITESPACE = '\\s*'
 var _states = {}
 var _defaultState
 var _currentState
 var _indentStack = []
 var _shouldTrackIndent : bool = false
 var filename = ''
 var title = ''
 var text = ''
 var states = {}
 var default_state
 var current_state
 var indent_stack = []
 var should_track_indent = false
 func _init(_filename, _title, _text):
-	create_states()
+	createstates()
 	filename = _filename
 	title = _title
 	text = _text
-func create_states():
+func createstates():
-	var patterns : Dictionary = {}
+	var patterns = {}
 	patterns[Constants.TokenType.Text] = ['.*', 'any text']
 	patterns[Constants.TokenType.Number] = ['\\-?[0-9]+(\\.[0-9+])?', 'any number']
@ -86,249 +86,234 @@ func create_states():
 	patterns[Constants.TokenType.Set] = ['set(?!\\w)', '"set"']
 	patterns[Constants.TokenType.ShortcutOption] = ['\\-\\>\\s*', '"->"']
-	#compound states
+	var shortcut_option = SHORTCUT + DASH + OPTION
-	var shortcut_option : String = SHORTCUT + DASH + OPTION
+	var shortcut_option_tag = shortcut_option + DASH + TAG
-	var shortcut_option_tag : String = shortcut_option + DASH + TAG
+	var command_or_expression = COMMAND + DASH + OR + DASH + EXPRESSION
-	var command_or_expression : String = COMMAND + DASH + OR + DASH + EXPRESSION
+	var link_destination = LINK + DASH + DESTINATION
 	var link_destination : String = LINK + DASH + DESTINATION
-	_states = {}
+	states = {}
-	_states[BASE] = LexerState.new(patterns)
+	states[BASE] = LexerState.new(patterns)
-	_states[BASE].add_transition(Constants.TokenType.BeginCommand,COMMAND,true)
+	states[BASE].add_transition(Constants.TokenType.BeginCommand, COMMAND, true)
-	_states[BASE].add_transition(Constants.TokenType.OptionStart,LINK,true)
+	states[BASE].add_transition(Constants.TokenType.OptionStart, LINK, true)
-	_states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
+	states[BASE].add_transition(Constants.TokenType.ShortcutOption, shortcut_option)
-	_states[BASE].add_transition(Constants.TokenType.TagMarker,TAG,true)
+	states[BASE].add_transition(Constants.TokenType.TagMarker, TAG, true)
-	_states[BASE].add_text_rule(Constants.TokenType.Text)
+	states[BASE].add_text_rule(Constants.TokenType.Text)
-	_states[TAG] = LexerState.new(patterns)
+	states[TAG] = LexerState.new(patterns)
-	_states[TAG].add_transition(Constants.TokenType.Identifier,BASE)
+	states[TAG].add_transition(Constants.TokenType.Identifier, BASE)
-	_states[shortcut_option] = LexerState.new(patterns)
+	states[shortcut_option] = LexerState.new(patterns)
-	_states[shortcut_option].track_indent = true
+	states[shortcut_option].track_indent = true
-	_states[shortcut_option].add_transition(Constants.TokenType.BeginCommand,EXPRESSION,true)
+	states[shortcut_option].add_transition(Constants.TokenType.BeginCommand, EXPRESSION, true)
-	_states[shortcut_option].add_transition(Constants.TokenType.TagMarker,shortcut_option_tag,true)
+	states[shortcut_option].add_transition(Constants.TokenType.TagMarker, shortcut_option_tag, true)
-	_states[shortcut_option].add_text_rule(Constants.TokenType.Text,BASE)
+	states[shortcut_option].add_text_rule(Constants.TokenType.Text, BASE)
-	_states[shortcut_option_tag] = LexerState.new(patterns)
+	states[shortcut_option_tag] = LexerState.new(patterns)
-	_states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier,shortcut_option)
+	states[shortcut_option_tag].add_transition(Constants.TokenType.Identifier, shortcut_option)
-	_states[COMMAND] = LexerState.new(patterns)
+	states[COMMAND] = LexerState.new(patterns)
-	_states[COMMAND].add_transition(Constants.TokenType.IfToken,EXPRESSION)
+	states[COMMAND].add_transition(Constants.TokenType.IfToken, EXPRESSION)
-	_states[COMMAND].add_transition(Constants.TokenType.ElseToken)
+	states[COMMAND].add_transition(Constants.TokenType.ElseToken)
-	_states[COMMAND].add_transition(Constants.TokenType.ElseIf,EXPRESSION)
+	states[COMMAND].add_transition(Constants.TokenType.ElseIf, EXPRESSION)
-	_states[COMMAND].add_transition(Constants.TokenType.EndIf)
+	states[COMMAND].add_transition(Constants.TokenType.EndIf)
-	_states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
+	states[COMMAND].add_transition(Constants.TokenType.Set, ASSIGNMENT)
-	_states[COMMAND].add_transition(Constants.TokenType.EndCommand,BASE,true)
+	states[COMMAND].add_transition(Constants.TokenType.EndCommand, BASE, true)
-	_states[COMMAND].add_transition(Constants.TokenType.Identifier,command_or_expression)
+	states[COMMAND].add_transition(Constants.TokenType.Identifier, command_or_expression)
-	_states[COMMAND].add_text_rule(Constants.TokenType.Text)
+	states[COMMAND].add_text_rule(Constants.TokenType.Text)
-	_states[command_or_expression] = LexerState.new(patterns)
+	states[command_or_expression] = LexerState.new(patterns)
-	_states[command_or_expression].add_transition(Constants.TokenType.LeftParen,EXPRESSION)
+	states[command_or_expression].add_transition(Constants.TokenType.LeftParen, EXPRESSION)
-	_states[command_or_expression].add_transition(Constants.TokenType.EndCommand,BASE,true)
+	states[command_or_expression].add_transition(Constants.TokenType.EndCommand, BASE, true)
-	_states[command_or_expression].add_text_rule(Constants.TokenType.Text)
+	states[command_or_expression].add_text_rule(Constants.TokenType.Text)
-	_states[ASSIGNMENT] = LexerState.new(patterns)
+	states[ASSIGNMENT] = LexerState.new(patterns)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.Variable)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.EqualToOrAssign, EXPRESSION)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.AddAssign, EXPRESSION)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.MinusAssign, EXPRESSION)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.MultiplyAssign, EXPRESSION)
-	_states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
+	states[ASSIGNMENT].add_transition(Constants.TokenType.DivideAssign, EXPRESSION)
-	_states[EXPRESSION] = LexerState.new(patterns)
+	states[EXPRESSION] = LexerState.new(patterns)
-	_states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
+	states[EXPRESSION].add_transition(Constants.TokenType.EndCommand, BASE)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Number)
+	states[EXPRESSION].add_transition(Constants.TokenType.Number)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Str)
+	states[EXPRESSION].add_transition(Constants.TokenType.Str)
-	_states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
+	states[EXPRESSION].add_transition(Constants.TokenType.LeftParen)
-	_states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
+	states[EXPRESSION].add_transition(Constants.TokenType.RightParen)
-	_states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
+	states[EXPRESSION].add_transition(Constants.TokenType.EqualTo)
-	_states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
+	states[EXPRESSION].add_transition(Constants.TokenType.EqualToOrAssign)
-	_states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
+	states[EXPRESSION].add_transition(Constants.TokenType.NotEqualTo)
-	_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
+	states[EXPRESSION].add_transition(Constants.TokenType.GreaterThanOrEqualTo)
-	_states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
+	states[EXPRESSION].add_transition(Constants.TokenType.GreaterThan)
-	_states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
+	states[EXPRESSION].add_transition(Constants.TokenType.LessThanOrEqualTo)
-	_states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
+	states[EXPRESSION].add_transition(Constants.TokenType.LessThan)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Add)
+	states[EXPRESSION].add_transition(Constants.TokenType.Add)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Minus)
+	states[EXPRESSION].add_transition(Constants.TokenType.Minus)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
+	states[EXPRESSION].add_transition(Constants.TokenType.Multiply)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Divide)
+	states[EXPRESSION].add_transition(Constants.TokenType.Divide)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
+	states[EXPRESSION].add_transition(Constants.TokenType.Modulo)
-	_states[EXPRESSION].add_transition(Constants.TokenType.And)
+	states[EXPRESSION].add_transition(Constants.TokenType.And)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Or)
+	states[EXPRESSION].add_transition(Constants.TokenType.Or)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Xor)
+	states[EXPRESSION].add_transition(Constants.TokenType.Xor)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Not)
+	states[EXPRESSION].add_transition(Constants.TokenType.Not)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Variable)
+	states[EXPRESSION].add_transition(Constants.TokenType.Variable)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Comma)
+	states[EXPRESSION].add_transition(Constants.TokenType.Comma)
-	_states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
+	states[EXPRESSION].add_transition(Constants.TokenType.TrueToken)
-	_states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
+	states[EXPRESSION].add_transition(Constants.TokenType.FalseToken)
-	_states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
+	states[EXPRESSION].add_transition(Constants.TokenType.NullToken)
-	_states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
+	states[EXPRESSION].add_transition(Constants.TokenType.Identifier)
-	_states[LINK] = LexerState.new(patterns)
+	states[LINK] = LexerState.new(patterns)
-	_states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
+	states[LINK].add_transition(Constants.TokenType.OptionEnd, BASE, true)
-	_states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
+	states[LINK].add_transition(Constants.TokenType.OptionDelimit, link_destination, true)
-	_states[LINK].add_text_rule(Constants.TokenType.Text)
+	states[LINK].add_text_rule(Constants.TokenType.Text)
-	_states[link_destination] = LexerState.new(patterns)
+	states[link_destination] = LexerState.new(patterns)
-	_states[link_destination].add_transition(Constants.TokenType.Identifier)
+	states[link_destination].add_transition(Constants.TokenType.Identifier)
-	_states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
+	states[link_destination].add_transition(Constants.TokenType.OptionEnd, BASE)
-	_defaultState = _states[BASE]
+	default_state = states[BASE]
-	for stateKey in _states.keys():
+	for key in states.keys():
-		_states[stateKey].stateName = stateKey
+		states[key].name = key
 func tokenize():
 	_indentStack.clear()
 	_indentStack.push_front(IntBoolPair.new(0, false))
 	_shouldTrackIndent = false
 	var tokens = []
-	_currentState = _defaultState
+	indent_stack.clear()
 	indent_stack.push_front([0, false])
 	should_track_indent = false
 	current_state = default_state
 	var lines = text.split(LINE_SEPARATOR)
-	lines.append('')
+	var line_number = 1
-	var line_number : int = 1
+	lines.append('')
 	for line in lines:
 		tokens += tokenize_line(line, line_number)
 		line_number += 1
-	var endOfInput = Token.new(
+	var end_of_input = Token.new(
 		Constants.TokenType.EndOfInput,
-		_currentState,
+		current_state,
 		line_number,
 		0
 	)
-	tokens.append(endOfInput)
+	tokens.append(end_of_input)
 	return tokens
 func tokenize_line(line, line_number):
-	var tokenStack : Array = []
+	var token_stack = []
-	var freshLine = line.replace('\t','    ').replace('\r','')
+	var fresh_line = line.replace('\t','    ').replace('\r','')
 	#record indentation
 	var indentation = line_indentation(line)
-	var prevIndentation = _indentStack.front()
+	var previous_indentation = indent_stack.front()[0]
-	if _shouldTrackIndent && indentation > prevIndentation.key:
+	if should_track_indent && indentation > previous_indentation:
-		#we add an indenation token to record indent level
+		indent_stack.push_front([indentation, true])
 		_indentStack.push_front(IntBoolPair.new(indentation,true))
-		var indent : Token = Token.new(
+		var indent = Token.new(
 			Constants.TokenType.Indent,
-			_currentState,
+			current_state,
 			filename,
 			line_number,
-			prevIndentation.key
+			previous_indentation
 		)
-		indent.value = '%*s' % [indentation - prevIndentation.key,'']
+		indent.value = '%*s' % [indentation - previous_indentation, '']
-		_shouldTrackIndent = false
+		should_track_indent = false
-		tokenStack.push_front(indent)
+		token_stack.push_front(indent)
-	elif indentation < prevIndentation.key:
+	elif indentation < previous_indentation:
-		#de-indent and then emit indentaiton token
+		while indentation < indent_stack.front()[0]:
-
+			var top = indent_stack.pop_front()[1]
-		while indentation < _indentStack.front().key:
+			if top:
-			var top : IntBoolPair = _indentStack.pop_front()
+				var deindent = Token.new(Constants.TokenType.Dedent, current_state, line_number, 0)
-			if top.value:
+				token_stack.push_front(deindent)
 				var deIndent : Token = Token.new(Constants.TokenType.Dedent,_currentState,line_number,0)
 				tokenStack.push_front(deIndent)
-	
+	var column = indentation
-	var column : int = indentation
+	var whitespace = RegEx.new()
 	whitespace.compile(WHITESPACE)
-	var whitespace : RegEx = RegEx.new()
+	while column < fresh_line.length():
-	var error = whitespace.compile(WHITESPACE)
+		if fresh_line.substr(column).begins_with(LINE_COMENT):
 	if error != OK:
 		printerr('unable to compile regex WHITESPACE')
 		return []
 	while column < freshLine.length():
 		if freshLine.substr(column).begins_with(LINE_COMENT):
 			break
-		var matched : bool = false
+		var matched = false
-		for rule in _currentState.rules:
+		for rule in current_state.rules:
-			var found = rule.regex.search(freshLine, column)
+			var found = rule.regex.search(fresh_line, column)
 			if !found:
 				continue
-			var tokenText : String
+			var token_text = ''
 			# NOTE: If this is text then we back up to the most recent delimiting token
 			#		and treat everything from there as text.
 			if rule.token_type == Constants.TokenType.Text:
 				#if this is text then we back up to the most recent
 				#delimiting token and treat everything from there as text.
-				var startIndex : int = indentation
+				var start_index = indentation
-				if tokenStack.size() > 0 :
+				if token_stack.size() > 0 :
-					while tokenStack.front().type == Constants.TokenType.Identifier:
+					while token_stack.front().type == Constants.TokenType.Identifier:
-						tokenStack.pop_front()
+						token_stack.pop_front()
-					var startDelimitToken : Token = tokenStack.front()
+					var start_delimit_token = token_stack.front()
-					startIndex =  startDelimitToken.column
+					start_index =  start_delimit_token.column
-					if startDelimitToken.type == Constants.TokenType.Indent:
+					if start_delimit_token.type == Constants.TokenType.Indent:
-						startIndex += startDelimitToken.value.length()
+						start_index += start_delimit_token.value.length()
-					if startDelimitToken.type == Constants.TokenType.Dedent:
+					if start_delimit_token.type == Constants.TokenType.Dedent:
-						startIndex = indentation
+						start_index = indentation
 				#
-				column = startIndex
+				column = start_index
 				var end_index = found.get_start() + found.get_string().length()
-				tokenText = freshLine.substr(startIndex, end_index - startIndex)
+				token_text = fresh_line.substr(start_index, end_index - start_index)
 			else:
-				tokenText = found.get_string()
+				token_text = found.get_string()
-			column += tokenText.length()
+			column += token_text.length()
 			#pre-proccess string
 			if rule.token_type == Constants.TokenType.Str:
-				tokenText = tokenText.substr(1, tokenText.length() - 2)
+				token_text = token_text.substr(1, token_text.length() - 2)
-				tokenText = tokenText.replace('\\\\', '\\')
+				token_text = token_text.replace('\\\\', '\\')
-				tokenText = tokenText.replace('\\\'','\'')
+				token_text = token_text.replace('\\\'','\'')
 			var token = Token.new(
 				rule.token_type,
-				_currentState,
+				current_state,
 				filename,
 				line_number,
 				column,
-				tokenText
+				token_text
 			)
 			token.delimits_text = rule.delimits_text
-			tokenStack.push_front(token)
+			token_stack.push_front(token)
 			if rule.enter_state != null and rule.enter_state.length() > 0:
-				if not _states.has(rule.enter_state):
+				if not states.has(rule.enter_state):
 					printerr('State[%s] not known - line(%s) col(%s)' % [rule.enter_state, line_number, column])
 					return []
-				enter_state(_states[rule.enter_state])
+				enter_state(states[rule.enter_state])
-				if _shouldTrackIndent:
+				if should_track_indent:
-					if _indentStack.front().key < indentation:
+					if indent_stack.front()[0] < indentation:
-						_indentStack.append(IntBoolPair.new(indentation, false))
+						indent_stack.append([indentation, false])
 			matched = true
 			break
 		if not matched:
 			var rules = []
-			for rule in _currentState.rules:
+			for rule in current_state.rules:
 				rules.append('"%s" (%s)' % [Constants.token_type_name(rule.token_type), rule.human_readable_identifier])
 			var error_data = [
@ -340,30 +325,30 @@ func tokenize_line(line, line_number):
 			]
 			assert(false, 'Expected %s in file %s in node "%s" on line #%d (column #%d)' % error_data)
-		var lastWhiteSpace = whitespace.search(line, column)
+		var last_whitespace = whitespace.search(line, column)
-		if lastWhiteSpace:
+		if last_whitespace:
-			column += lastWhiteSpace.get_string().length()
+			column += last_whitespace.get_string().length()
-	tokenStack.invert()
+	token_stack.invert()
-	return tokenStack
+	return token_stack
-func line_indentation(line:String)->int:
+func line_indentation(line):
-	var indentRegex : RegEx = RegEx.new()
+	var indent_regex = RegEx.new()
-	indentRegex.compile('^(\\s*)')
+	indent_regex.compile('^(\\s*)')
-	var found : RegExMatch = indentRegex.search(line)
+	var found = indent_regex.search(line)
-	if !found || found.get_string().length() <= 0:
+	if !found or found.get_string().length() <= 0:
 		return 0
 	return found.get_string().length()
-func enter_state(state:LexerState):
+func enter_state(state):
-	_currentState = state;
+	current_state = state;
-	if _currentState.track_indent:
+	if current_state.track_indent:
-		_shouldTrackIndent = true
+		should_track_indent = true
 class Token:
 	var type = -1
@ -375,38 +360,36 @@ class Token:
 	var text = ''
 	var delimits_text = false
-	var paramCount = -1
+	var parameter_count = -1
-	var lexerState = ''
+	var lexer_state = ''
 	func _init(_type, _state, _filename, _line_number = -1, _column = -1, _value = ''):
 		type = _type
-		lexerState = _state.stateName
+		lexer_state = _state.name
 		filename = _filename
 		line_number = _line_number
 		column = _column
 		value = _value
 	func _to_string():
-		return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value,line_number,column,lexerState]
+		return '%s (%s) at %s:%s (state: %s)' % [Constants.token_type_name(type),value, line_number, column, lexer_state]
 class LexerState:
-
+	var name = ''
-	var stateName : String
+	var patterns = {}
-	var patterns : Dictionary
+	var rules = []
-	var rules : Array = []
+	var track_indent = false
 	var track_indent : bool = false
 	func _init(_patterns):
 		patterns = _patterns
-	func add_transition(type : int, state : String = '',delimitText : bool = false)->Rule:
+	func add_transition(type, state = '', delimit_text = false):
 		var pattern = '\\G%s' % patterns[type][0]
-		# print('pattern = %s' % pattern)
+		var rule = Rule.new(type, pattern, patterns[type][1], state, delimit_text)
 		var rule = Rule.new(type, pattern, patterns[type][1], state, delimitText)
 		rules.append(rule)
 		return rule
-	func add_text_rule(type : int, state : String = '')->Rule:
+	func add_text_rule(type, state = ''):
 		if contains_text_rule() :
 			printerr('State already contains Text rule')
 			return null
@ -417,25 +400,25 @@ class LexerState:
 				delimiters.append('%s' % rule.regex.get_pattern().substr(2))
 		var pattern = '\\G((?!%s).)*' % [PoolStringArray(delimiters).join('|')]
-		var rule : Rule = add_transition(type,state)
+		var rule = add_transition(type, state)
 		rule.regex = RegEx.new()
 		rule.regex.compile(pattern)
 		rule.is_text_rule = true
 		return rule
-	func contains_text_rule()->bool:
+	func contains_text_rule():
 		for rule in rules:
 			if rule.is_text_rule:
 				return true
 		return false
 class Rule:
-	var regex : RegEx
+	var regex
-	var enter_state : String
+	var enter_state = ''
-	var token_type : int
+	var token_type = -1
-	var is_text_rule : bool
+	var is_text_rule = false
-	var delimits_text : bool
+	var delimits_text = false
 	var human_readable_identifier = ''
 	func _init(_type, _regex, _human_readable_identifier, _enter_state, _delimits_text):
@ -450,12 +433,3 @@ class Rule:
 	func _to_string():
 		return '[Rule : %s (%s) - %s]' % [Constants.token_type_name(token_type), human_readable_identifier, regex]
 class IntBoolPair:
 	var key = -1
 	var value = false
 	func _init(_key, _value):
 		key = _key
 		value = _value