Add tokenizer tests

This commit is contained in:
James Westman 2021-10-18 12:37:05 -05:00
parent cf2f5215c8
commit d98b7d5891
No known key found for this signature in database
GPG key ID: CE2DBA0ADB654EA6
4 changed files with 79 additions and 3 deletions

View file

@ -54,7 +54,7 @@ class CompileError(PrintableError):
print(f"""{_colors.RED}{_colors.BOLD}{self.category}: {self.message}{_colors.CLEAR} print(f"""{_colors.RED}{_colors.BOLD}{self.category}: {self.message}{_colors.CLEAR}
at {filename} line {line_num} column {col_num}: at {filename} line {line_num} column {col_num}:
{_colors.FAINT}{line_num :>4} |{_colors.CLEAR} {line} {_colors.FAINT}|{" "*(col_num)}^{_colors.CLEAR} {_colors.FAINT}{line_num :>4} |{_colors.CLEAR}{line.rstrip()}\n {_colors.FAINT}|{" "*(col_num-1)}^{_colors.CLEAR}
""") """)

View file

View file

@ -0,0 +1,76 @@
# tokenizer.py
#
# Copyright 2021 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import unittest
from ..errors import PrintableError
from ..tokenizer import Token, TokenType, tokenize
class TestTokenizer(unittest.TestCase):
def assert_tokenize(self, string: str, expect: [Token]):
try:
tokens = tokenize(string)
self.assertEqual(len(tokens), len(expect))
for token, (type, token_str) in zip(tokens, expect):
self.assertEqual(token.type, type)
self.assertEqual(str(token), token_str)
except PrintableError as e:
e.pretty_print("<test input>", string)
raise e
def test_basic(self):
self.assert_tokenize("ident(){}; \n <<+>>*/=", [
(TokenType.IDENT, "ident"),
(TokenType.OPEN_PAREN, "("),
(TokenType.CLOSE_PAREN, ")"),
(TokenType.OPEN_BLOCK, "{"),
(TokenType.CLOSE_BLOCK, "}"),
(TokenType.STMT_END, ";"),
(TokenType.WHITESPACE, " \n "),
(TokenType.OP, "<<+>>*/="),
(TokenType.EOF, ""),
])
def test_quotes(self):
self.assert_tokenize(r'"this is a \n string""this is \\another \"string\""', [
(TokenType.QUOTED, r'"this is a \n string"'),
(TokenType.QUOTED, r'"this is \\another \"string\""'),
(TokenType.EOF, ""),
])
def test_comments(self):
self.assert_tokenize('/* \n \\n COMMENT /* */', [
(TokenType.COMMENT, '/* \n \\n COMMENT /* */'),
(TokenType.EOF, ""),
])
self.assert_tokenize('line // comment\nline', [
(TokenType.IDENT, 'line'),
(TokenType.WHITESPACE, ' '),
(TokenType.COMMENT, '// comment'),
(TokenType.WHITESPACE, '\n'),
(TokenType.IDENT, 'line'),
(TokenType.EOF, ""),
])
if __name__ == "__main__":
unittest.main()

View file

@ -55,12 +55,12 @@ _TOKENS = [
(TokenType.OPEN_BLOCK, r"\{"), (TokenType.OPEN_BLOCK, r"\{"),
(TokenType.CLOSE_BLOCK, r"\}"), (TokenType.CLOSE_BLOCK, r"\}"),
(TokenType.STMT_END, r";"), (TokenType.STMT_END, r";"),
(TokenType.OP, r"[:=\.=\|<>]+"),
(TokenType.WHITESPACE, r"\s+"), (TokenType.WHITESPACE, r"\s+"),
(TokenType.COMMENT, r"\/\*.*?\*\/"), (TokenType.COMMENT, r"/\*[\s\S]*\*/"),
(TokenType.COMMENT, r"\/\/[^\n]*"), (TokenType.COMMENT, r"\/\/[^\n]*"),
(TokenType.OPEN_BRACKET, r"\["), (TokenType.OPEN_BRACKET, r"\["),
(TokenType.CLOSE_BRACKET, r"\]"), (TokenType.CLOSE_BRACKET, r"\]"),
(TokenType.OP, r"[:=\.=\|<>\+\-/\*]+"),
(TokenType.COMMA, r"\,"), (TokenType.COMMA, r"\,"),
] ]
_TOKENS = [(type, re.compile(regex)) for (type, regex) in _TOKENS] _TOKENS = [(type, re.compile(regex)) for (type, regex) in _TOKENS]