Fix bugs in the tokenizer

- Fix bug where empty strings were not recognized
- Add an end index to the tokenizer's compile error
This commit is contained in:
James Westman 2021-11-12 09:42:48 -06:00
parent f8478adf3a
commit b4d4877e07
No known key found for this signature in database
GPG key ID: CE2DBA0ADB654EA6

View file

@ -47,8 +47,8 @@ class TokenType(Enum):
_tokens = [ _tokens = [
(TokenType.DIRECTIVE, r"@[\d\w\-_]+"), (TokenType.DIRECTIVE, r"@[\d\w\-_]+"),
(TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"), (TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"),
(TokenType.QUOTED, r'"(\\"|[^"\n])+"'), (TokenType.QUOTED, r'"(\\"|[^"\n])*"'),
(TokenType.QUOTED, r"'(\\'|[^'\n])+'"), (TokenType.QUOTED, r"'(\\'|[^'\n])*'"),
(TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"), (TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"),
(TokenType.NUMBER, r"0x[A-Fa-f0-9]+"), (TokenType.NUMBER, r"0x[A-Fa-f0-9]+"),
(TokenType.OPEN_PAREN, r"\("), (TokenType.OPEN_PAREN, r"\("),
@ -107,7 +107,7 @@ def _tokenize(ui_ml: str):
break break
if not matched: if not matched:
raise CompileError("Could not determine what kind of syntax is meant here", i) raise CompileError("Could not determine what kind of syntax is meant here", i, i)
yield Token(TokenType.EOF, i, i, ui_ml) yield Token(TokenType.EOF, i, i, ui_ml)