From 3bdcc83c4e2eff3d67498802c68c05548a03dee1 Mon Sep 17 00:00:00 2001 From: James Westman Date: Wed, 9 Feb 2022 15:24:56 -0600 Subject: [PATCH] tokenizer: Remove redundant token types --- blueprintcompiler/completions_utils.py | 6 ++--- blueprintcompiler/parse_tree.py | 2 +- blueprintcompiler/tokenizer.py | 37 ++++++-------------------- tests/test_tokenizer.py | 10 +++---- 4 files changed, 17 insertions(+), 38 deletions(-) diff --git a/blueprintcompiler/completions_utils.py b/blueprintcompiler/completions_utils.py index b3c787a..85f5159 100644 --- a/blueprintcompiler/completions_utils.py +++ b/blueprintcompiler/completions_utils.py @@ -25,9 +25,9 @@ from .lsp_utils import Completion new_statement_patterns = [ - [(TokenType.OPEN_BLOCK, None)], - [(TokenType.CLOSE_BLOCK, None)], - [(TokenType.STMT_END, None)], + [(TokenType.PUNCTUATION, "{")], + [(TokenType.PUNCTUATION, "}")], + [(TokenType.PUNCTUATION, ";")], ] diff --git a/blueprintcompiler/parse_tree.py b/blueprintcompiler/parse_tree.py index 9096f65..afd1031 100644 --- a/blueprintcompiler/parse_tree.py +++ b/blueprintcompiler/parse_tree.py @@ -313,7 +313,7 @@ class Statement(ParseNode): return True token = ctx.peek_token() - if token.type != TokenType.STMT_END: + if str(token) != ";": ctx.errors.append(CompileError("Expected `;`", token.start, token.end)) else: ctx.next_token() diff --git a/blueprintcompiler/tokenizer.py b/blueprintcompiler/tokenizer.py index 76ae2ca..31d75d5 100644 --- a/blueprintcompiler/tokenizer.py +++ b/blueprintcompiler/tokenizer.py @@ -27,42 +27,26 @@ from .errors import CompileError class TokenType(Enum): EOF = 0 - DIRECTIVE = 1 - IDENT = 2 - QUOTED = 3 - NUMBER = 4 - OPEN_PAREN = 5 - CLOSE_PAREN = 6 - OPEN_BLOCK = 7 - CLOSE_BLOCK = 8 - STMT_END = 9 - OP = 10 - WHITESPACE = 11 - COMMENT = 12 - OPEN_BRACKET = 13 - CLOSE_BRACKET = 14 - COMMA = 15 + IDENT = 1 + QUOTED = 2 + NUMBER = 3 + OP = 4 + WHITESPACE = 5 + COMMENT = 6 + PUNCTUATION = 7 _tokens = [ - (TokenType.DIRECTIVE, r"@[\d\w\-_]+"), (TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"), (TokenType.QUOTED, r'"(\\"|[^"\n])*"'), (TokenType.QUOTED, r"'(\\'|[^'\n])*'"), (TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"), (TokenType.NUMBER, r"0x[A-Fa-f0-9]+"), - (TokenType.OPEN_PAREN, r"\("), - (TokenType.CLOSE_PAREN, r"\)"), - (TokenType.OPEN_BLOCK, r"\{"), - (TokenType.CLOSE_BLOCK, r"\}"), - (TokenType.STMT_END, r";"), (TokenType.WHITESPACE, r"\s+"), (TokenType.COMMENT, r"\/\*[\s\S]*?\*\/"), (TokenType.COMMENT, r"\/\/[^\n]*"), - (TokenType.OPEN_BRACKET, r"\["), - (TokenType.CLOSE_BRACKET, r"\]"), (TokenType.OP, r"[:=\.=\|<>\+\-/\*]+"), - (TokenType.COMMA, r"\,"), + (TokenType.PUNCTUATION, r"\(|\)|\{|\}|;|\[|\]|\,"), ] _TOKENS = [(type, re.compile(regex)) for (type, regex) in _tokens] @@ -77,11 +61,6 @@ class Token: def __str__(self): return self.string[self.start:self.end] - def is_directive(self, directive) -> bool: - if self.type != TokenType.DIRECTIVE: - return False - return str(self) == "@" + directive - def get_number(self): if self.type != TokenType.NUMBER: return None diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 1dcb87b..ec39069 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -40,11 +40,11 @@ class TestTokenizer(unittest.TestCase): def test_basic(self): self.assert_tokenize("ident(){}; \n <<+>>*/=", [ (TokenType.IDENT, "ident"), - (TokenType.OPEN_PAREN, "("), - (TokenType.CLOSE_PAREN, ")"), - (TokenType.OPEN_BLOCK, "{"), - (TokenType.CLOSE_BLOCK, "}"), - (TokenType.STMT_END, ";"), + (TokenType.PUNCTUATION, "("), + (TokenType.PUNCTUATION, ")"), + (TokenType.PUNCTUATION, "{"), + (TokenType.PUNCTUATION, "}"), + (TokenType.PUNCTUATION, ";"), (TokenType.WHITESPACE, " \n "), (TokenType.OP, "<<+>>*/="), (TokenType.EOF, ""),