From d98b7d5891ad3e1a5e18024ca82831955ac31ef7 Mon Sep 17 00:00:00 2001
From: James Westman <james@jwestman.net>
Date: Mon, 18 Oct 2021 12:37:05 -0500
Subject: [PATCH] Add tokenizer tests

---
 gtkblueprinttool/errors.py          |  2 +-
 gtkblueprinttool/tests/__init__.py  |  0
 gtkblueprinttool/tests/tokenizer.py | 76 +++++++++++++++++++++++++++++
 gtkblueprinttool/tokenizer.py       |  4 +-
 4 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 gtkblueprinttool/tests/__init__.py
 create mode 100644 gtkblueprinttool/tests/tokenizer.py
diff --git a/gtkblueprinttool/errors.py b/gtkblueprinttool/errors.py
index 916dd20..524ef9f 100644
--- a/gtkblueprinttool/errors.py
+++ b/gtkblueprinttool/errors.py
@@ -54,7 +54,7 @@ class CompileError(PrintableError):
 
         print(f"""{_colors.RED}{_colors.BOLD}{self.category}: {self.message}{_colors.CLEAR}
 at {filename} line {line_num} column {col_num}:
-{_colors.FAINT}{line_num :>4} |{_colors.CLEAR} {line}     {_colors.FAINT}|{" "*(col_num)}^{_colors.CLEAR}
+{_colors.FAINT}{line_num :>4} |{_colors.CLEAR}{line.rstrip()}\n     {_colors.FAINT}|{" "*(col_num-1)}^{_colors.CLEAR}
 """)
 
 
diff --git a/gtkblueprinttool/tests/__init__.py b/gtkblueprinttool/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gtkblueprinttool/tests/tokenizer.py b/gtkblueprinttool/tests/tokenizer.py
new file mode 100644
index 0000000..b47ee90
--- /dev/null
+++ b/gtkblueprinttool/tests/tokenizer.py
@@ -0,0 +1,76 @@
+# tokenizer.py
+#
+# Copyright 2021 James Westman <james@jwestman.net>
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 3 of the
+# License, or (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+import unittest
+
+from ..errors import PrintableError
+from ..tokenizer import Token, TokenType, tokenize
+
+
+class TestTokenizer(unittest.TestCase):
+    def assert_tokenize(self, string: str, expect: [Token]):
+        try:
+            tokens = tokenize(string)
+            self.assertEqual(len(tokens), len(expect))
+            for token, (type, token_str) in zip(tokens, expect):
+                self.assertEqual(token.type, type)
+                self.assertEqual(str(token), token_str)
+        except PrintableError as e:
+            e.pretty_print("<test input>", string)
+            raise e
+
+
+    def test_basic(self):
+        self.assert_tokenize("ident(){}; \n <<+>>*/=", [
+            (TokenType.IDENT, "ident"),
+            (TokenType.OPEN_PAREN, "("),
+            (TokenType.CLOSE_PAREN, ")"),
+            (TokenType.OPEN_BLOCK, "{"),
+            (TokenType.CLOSE_BLOCK, "}"),
+            (TokenType.STMT_END, ";"),
+            (TokenType.WHITESPACE, " \n "),
+            (TokenType.OP, "<<+>>*/="),
+            (TokenType.EOF, ""),
+        ])
+
+    def test_quotes(self):
+        self.assert_tokenize(r'"this is a \n string""this is \\another \"string\""', [
+            (TokenType.QUOTED, r'"this is a \n string"'),
+            (TokenType.QUOTED, r'"this is \\another \"string\""'),
+            (TokenType.EOF, ""),
+        ])
+
+    def test_comments(self):
+        self.assert_tokenize('/* \n \\n COMMENT /* */', [
+            (TokenType.COMMENT, '/* \n \\n COMMENT /* */'),
+            (TokenType.EOF, ""),
+        ])
+        self.assert_tokenize('line // comment\nline', [
+            (TokenType.IDENT, 'line'),
+            (TokenType.WHITESPACE, ' '),
+            (TokenType.COMMENT, '// comment'),
+            (TokenType.WHITESPACE, '\n'),
+            (TokenType.IDENT, 'line'),
+            (TokenType.EOF, ""),
+        ])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/gtkblueprinttool/tokenizer.py b/gtkblueprinttool/tokenizer.py
index e3fd32f..91bc2aa 100644
--- a/gtkblueprinttool/tokenizer.py
+++ b/gtkblueprinttool/tokenizer.py
@@ -55,12 +55,12 @@ _TOKENS = [
     (TokenType.OPEN_BLOCK,      r"\{"),
     (TokenType.CLOSE_BLOCK,     r"\}"),
     (TokenType.STMT_END,        r";"),
-    (TokenType.OP,              r"[:=\.=\|<>]+"),
     (TokenType.WHITESPACE,      r"\s+"),
-    (TokenType.COMMENT,         r"\/\*.*?\*\/"),
+    (TokenType.COMMENT,         r"/\*[\s\S]*\*/"),
     (TokenType.COMMENT,         r"\/\/[^\n]*"),
     (TokenType.OPEN_BRACKET,    r"\["),
     (TokenType.CLOSE_BRACKET,   r"\]"),
+    (TokenType.OP,              r"[:=\.=\|<>\+\-/\*]+"),
     (TokenType.COMMA,           r"\,"),
 ]
 _TOKENS = [(type, re.compile(regex)) for (type, regex) in _TOKENS]