tokenizer: Remove redundant token types

This commit is contained in:
James Westman 2022-02-09 15:24:56 -06:00
parent bac008296a
commit 3bdcc83c4e
4 changed files with 17 additions and 38 deletions

View file

@ -25,9 +25,9 @@ from .lsp_utils import Completion
new_statement_patterns = [ new_statement_patterns = [
[(TokenType.OPEN_BLOCK, None)], [(TokenType.PUNCTUATION, "{")],
[(TokenType.CLOSE_BLOCK, None)], [(TokenType.PUNCTUATION, "}")],
[(TokenType.STMT_END, None)], [(TokenType.PUNCTUATION, ";")],
] ]

View file

@ -313,7 +313,7 @@ class Statement(ParseNode):
return True return True
token = ctx.peek_token() token = ctx.peek_token()
if token.type != TokenType.STMT_END: if str(token) != ";":
ctx.errors.append(CompileError("Expected `;`", token.start, token.end)) ctx.errors.append(CompileError("Expected `;`", token.start, token.end))
else: else:
ctx.next_token() ctx.next_token()

View file

@ -27,42 +27,26 @@ from .errors import CompileError
class TokenType(Enum): class TokenType(Enum):
EOF = 0 EOF = 0
DIRECTIVE = 1 IDENT = 1
IDENT = 2 QUOTED = 2
QUOTED = 3 NUMBER = 3
NUMBER = 4 OP = 4
OPEN_PAREN = 5 WHITESPACE = 5
CLOSE_PAREN = 6 COMMENT = 6
OPEN_BLOCK = 7 PUNCTUATION = 7
CLOSE_BLOCK = 8
STMT_END = 9
OP = 10
WHITESPACE = 11
COMMENT = 12
OPEN_BRACKET = 13
CLOSE_BRACKET = 14
COMMA = 15
_tokens = [ _tokens = [
(TokenType.DIRECTIVE, r"@[\d\w\-_]+"),
(TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"), (TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"),
(TokenType.QUOTED, r'"(\\"|[^"\n])*"'), (TokenType.QUOTED, r'"(\\"|[^"\n])*"'),
(TokenType.QUOTED, r"'(\\'|[^'\n])*'"), (TokenType.QUOTED, r"'(\\'|[^'\n])*'"),
(TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"), (TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"),
(TokenType.NUMBER, r"0x[A-Fa-f0-9]+"), (TokenType.NUMBER, r"0x[A-Fa-f0-9]+"),
(TokenType.OPEN_PAREN, r"\("),
(TokenType.CLOSE_PAREN, r"\)"),
(TokenType.OPEN_BLOCK, r"\{"),
(TokenType.CLOSE_BLOCK, r"\}"),
(TokenType.STMT_END, r";"),
(TokenType.WHITESPACE, r"\s+"), (TokenType.WHITESPACE, r"\s+"),
(TokenType.COMMENT, r"\/\*[\s\S]*?\*\/"), (TokenType.COMMENT, r"\/\*[\s\S]*?\*\/"),
(TokenType.COMMENT, r"\/\/[^\n]*"), (TokenType.COMMENT, r"\/\/[^\n]*"),
(TokenType.OPEN_BRACKET, r"\["),
(TokenType.CLOSE_BRACKET, r"\]"),
(TokenType.OP, r"[:=\.=\|<>\+\-/\*]+"), (TokenType.OP, r"[:=\.=\|<>\+\-/\*]+"),
(TokenType.COMMA, r"\,"), (TokenType.PUNCTUATION, r"\(|\)|\{|\}|;|\[|\]|\,"),
] ]
_TOKENS = [(type, re.compile(regex)) for (type, regex) in _tokens] _TOKENS = [(type, re.compile(regex)) for (type, regex) in _tokens]
@ -77,11 +61,6 @@ class Token:
def __str__(self): def __str__(self):
return self.string[self.start:self.end] return self.string[self.start:self.end]
def is_directive(self, directive) -> bool:
if self.type != TokenType.DIRECTIVE:
return False
return str(self) == "@" + directive
def get_number(self): def get_number(self):
if self.type != TokenType.NUMBER: if self.type != TokenType.NUMBER:
return None return None

View file

@ -40,11 +40,11 @@ class TestTokenizer(unittest.TestCase):
def test_basic(self): def test_basic(self):
self.assert_tokenize("ident(){}; \n <<+>>*/=", [ self.assert_tokenize("ident(){}; \n <<+>>*/=", [
(TokenType.IDENT, "ident"), (TokenType.IDENT, "ident"),
(TokenType.OPEN_PAREN, "("), (TokenType.PUNCTUATION, "("),
(TokenType.CLOSE_PAREN, ")"), (TokenType.PUNCTUATION, ")"),
(TokenType.OPEN_BLOCK, "{"), (TokenType.PUNCTUATION, "{"),
(TokenType.CLOSE_BLOCK, "}"), (TokenType.PUNCTUATION, "}"),
(TokenType.STMT_END, ";"), (TokenType.PUNCTUATION, ";"),
(TokenType.WHITESPACE, " \n "), (TokenType.WHITESPACE, " \n "),
(TokenType.OP, "<<+>>*/="), (TokenType.OP, "<<+>>*/="),
(TokenType.EOF, ""), (TokenType.EOF, ""),