From d98b7d5891ad3e1a5e18024ca82831955ac31ef7 Mon Sep 17 00:00:00 2001 From: James Westman Date: Mon, 18 Oct 2021 12:37:05 -0500 Subject: [PATCH] Add tokenizer tests --- gtkblueprinttool/errors.py | 2 +- gtkblueprinttool/tests/__init__.py | 0 gtkblueprinttool/tests/tokenizer.py | 76 +++++++++++++++++++++++++++++ gtkblueprinttool/tokenizer.py | 4 +- 4 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 gtkblueprinttool/tests/__init__.py create mode 100644 gtkblueprinttool/tests/tokenizer.py diff --git a/gtkblueprinttool/errors.py b/gtkblueprinttool/errors.py index 916dd20..524ef9f 100644 --- a/gtkblueprinttool/errors.py +++ b/gtkblueprinttool/errors.py @@ -54,7 +54,7 @@ class CompileError(PrintableError): print(f"""{_colors.RED}{_colors.BOLD}{self.category}: {self.message}{_colors.CLEAR} at {filename} line {line_num} column {col_num}: -{_colors.FAINT}{line_num :>4} |{_colors.CLEAR} {line} {_colors.FAINT}|{" "*(col_num)}^{_colors.CLEAR} +{_colors.FAINT}{line_num :>4} |{_colors.CLEAR}{line.rstrip()}\n {_colors.FAINT}|{" "*(col_num-1)}^{_colors.CLEAR} """) diff --git a/gtkblueprinttool/tests/__init__.py b/gtkblueprinttool/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gtkblueprinttool/tests/tokenizer.py b/gtkblueprinttool/tests/tokenizer.py new file mode 100644 index 0000000..b47ee90 --- /dev/null +++ b/gtkblueprinttool/tests/tokenizer.py @@ -0,0 +1,76 @@ +# tokenizer.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import unittest + +from ..errors import PrintableError +from ..tokenizer import Token, TokenType, tokenize + + +class TestTokenizer(unittest.TestCase): + def assert_tokenize(self, string: str, expect: [Token]): + try: + tokens = tokenize(string) + self.assertEqual(len(tokens), len(expect)) + for token, (type, token_str) in zip(tokens, expect): + self.assertEqual(token.type, type) + self.assertEqual(str(token), token_str) + except PrintableError as e: + e.pretty_print("", string) + raise e + + + def test_basic(self): + self.assert_tokenize("ident(){}; \n <<+>>*/=", [ + (TokenType.IDENT, "ident"), + (TokenType.OPEN_PAREN, "("), + (TokenType.CLOSE_PAREN, ")"), + (TokenType.OPEN_BLOCK, "{"), + (TokenType.CLOSE_BLOCK, "}"), + (TokenType.STMT_END, ";"), + (TokenType.WHITESPACE, " \n "), + (TokenType.OP, "<<+>>*/="), + (TokenType.EOF, ""), + ]) + + def test_quotes(self): + self.assert_tokenize(r'"this is a \n string""this is \\another \"string\""', [ + (TokenType.QUOTED, r'"this is a \n string"'), + (TokenType.QUOTED, r'"this is \\another \"string\""'), + (TokenType.EOF, ""), + ]) + + def test_comments(self): + self.assert_tokenize('/* \n \\n COMMENT /* */', [ + (TokenType.COMMENT, '/* \n \\n COMMENT /* */'), + (TokenType.EOF, ""), + ]) + self.assert_tokenize('line // comment\nline', [ + (TokenType.IDENT, 'line'), + (TokenType.WHITESPACE, ' '), + (TokenType.COMMENT, '// comment'), + (TokenType.WHITESPACE, '\n'), + (TokenType.IDENT, 'line'), + (TokenType.EOF, ""), + ]) + + +if __name__ == "__main__": + unittest.main() diff --git a/gtkblueprinttool/tokenizer.py b/gtkblueprinttool/tokenizer.py index e3fd32f..91bc2aa 100644 --- a/gtkblueprinttool/tokenizer.py +++ b/gtkblueprinttool/tokenizer.py @@ -55,12 +55,12 @@ _TOKENS = [ (TokenType.OPEN_BLOCK, r"\{"), (TokenType.CLOSE_BLOCK, r"\}"), (TokenType.STMT_END, r";"), - (TokenType.OP, r"[:=\.=\|<>]+"), (TokenType.WHITESPACE, r"\s+"), - (TokenType.COMMENT, r"\/\*.*?\*\/"), + (TokenType.COMMENT, r"/\*[\s\S]*\*/"), (TokenType.COMMENT, r"\/\/[^\n]*"), (TokenType.OPEN_BRACKET, r"\["), (TokenType.CLOSE_BRACKET, r"\]"), + (TokenType.OP, r"[:=\.=\|<>\+\-/\*]+"), (TokenType.COMMA, r"\,"), ] _TOKENS = [(type, re.compile(regex)) for (type, regex) in _TOKENS]