From 8f3ae9a626fbc8fcaa37debe150032d61f6a3264 Mon Sep 17 00:00:00 2001
From: James Westman <james@jwestman.net>
Date: Sat, 3 May 2025 10:10:06 -0500
Subject: [PATCH] parser: Tweak parsing during error conditions

When an explicit parsing error is encountered and a CompileError raised,
apply the changes to the context state. This way, the rule that catches
the exception (e.g. Statement or Until) knows where the error occurred.

Also, changed "Expected" errors to be reported at the end of the
previous non-whitespace token.
---
 blueprintcompiler/errors.py                   | 47 +++++++++++--------
 blueprintcompiler/language/common.py          |  1 +
 blueprintcompiler/language/contexts.py        | 12 +++--
 blueprintcompiler/parse_tree.py               | 30 ++++++++++--
 tests/sample_errors/empty.err                 |  2 +-
 tests/sample_errors/expected_semicolon.err    |  2 +-
 tests/sample_errors/incomplete_signal.err     |  3 +-
 .../sample_errors/menu_toplevel_attribute.err |  3 +-
 tests/sample_errors/no_import_version.err     |  2 +-
 tests/test_samples.py                         |  4 +-
 10 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/blueprintcompiler/errors.py b/blueprintcompiler/errors.py
index df1c2e1..f5d2e06 100644
--- a/blueprintcompiler/errors.py
+++ b/blueprintcompiler/errors.py
@@ -92,29 +92,38 @@ class CompileError(PrintableError):
     def pretty_print(self, filename: str, code: str, stream=sys.stdout) -> None:
         assert self.range is not None
 
-        line_num, col_num = utils.idx_to_pos(self.range.start + 1, code)
-        end_line_num, end_col_num = utils.idx_to_pos(self.range.end + 1, code)
-        line = code.splitlines(True)[line_num] if code != "" else ""
+        def format_line(range: Range):
+            line_num, col_num = utils.idx_to_pos(range.start, code)
+            end_line_num, end_col_num = utils.idx_to_pos(range.end, code)
+            line = code.splitlines(True)[line_num] if code != "" else ""
 
-        # Display 1-based line numbers
-        line_num += 1
-        end_line_num += 1
+            # Display 1-based line numbers
+            line_num += 1
+            end_line_num += 1
+            col_num += 1
+            end_col_num += 1
 
-        n_spaces = col_num - 1
-        n_carets = (
-            (end_col_num - col_num)
-            if line_num == end_line_num
-            else (len(line) - n_spaces - 1)
-        )
+            n_spaces = col_num - 1
+            n_carets = (
+                (end_col_num - col_num)
+                if line_num == end_line_num
+                else (len(line) - n_spaces - 1)
+            )
 
-        n_spaces += line.count("\t", 0, col_num)
-        n_carets += line.count("\t", col_num, col_num + n_carets)
-        line = line.replace("\t", "  ")
+            n_spaces += line.count("\t", 0, col_num)
+            n_carets += line.count("\t", col_num, col_num + n_carets)
+            line = line.replace("\t", "  ")
+
+            n_carets = max(n_carets, 1)
+
+            return line_num, col_num, line.rstrip(), (" " * n_spaces) + ("^" * n_carets)
+
+        line_num, col_num, line, carets = format_line(self.range)
 
         stream.write(
             f"""{self.color}{Colors.BOLD}{self.category}: {self.message}{Colors.CLEAR}
 at {filename} line {line_num} column {col_num}:
-{Colors.FAINT}{line_num :>4} |{Colors.CLEAR}{line.rstrip()}\n     {Colors.FAINT}|{" "*n_spaces}{"^"*n_carets}{Colors.CLEAR}\n"""
+{Colors.FAINT}{line_num :>4} |{Colors.CLEAR}{line}\n     {Colors.FAINT}|{carets}{Colors.CLEAR}\n"""
         )
 
         for hint in self.hints:
@@ -139,14 +148,12 @@ at {filename} line {line_num} column {col_num}:
                 )
 
         for ref in self.references:
-            line_num, col_num = utils.idx_to_pos(ref.range.start + 1, code)
-            line = code.splitlines(True)[line_num]
-            line_num += 1
+            line_num, col_num, line, carets = format_line(ref.range)
 
             stream.write(
                 f"""{Colors.FAINT}note: {ref.message}:
 at {filename} line {line_num} column {col_num}:
-{Colors.FAINT}{line_num :>4} |{line.rstrip()}\n     {Colors.FAINT}|{" "*(col_num-1)}^{Colors.CLEAR}\n"""
+{Colors.FAINT}{line_num :>4} |{line}\n     {Colors.FAINT}|{carets}{Colors.CLEAR}\n"""
             )
 
         stream.write("\n")
diff --git a/blueprintcompiler/language/common.py b/blueprintcompiler/language/common.py
index 1cc1b3b..9bd04a5 100644
--- a/blueprintcompiler/language/common.py
+++ b/blueprintcompiler/language/common.py
@@ -34,6 +34,7 @@ from ..errors import (
     CompileError,
     CompileWarning,
     DeprecatedWarning,
+    ErrorReference,
     MultipleErrors,
     UnusedWarning,
     UpgradeWarning,
diff --git a/blueprintcompiler/language/contexts.py b/blueprintcompiler/language/contexts.py
index 6e26048..38d84f4 100644
--- a/blueprintcompiler/language/contexts.py
+++ b/blueprintcompiler/language/contexts.py
@@ -48,7 +48,7 @@ class ScopeCtx:
             return self.node
 
     @cached_property
-    def objects(self) -> T.Dict[str, Object]:
+    def objects(self) -> T.Dict[str, AstNode]:
         return {
             obj.tokens["id"]: obj
             for obj in self._iter_recursive(self.node)
@@ -58,7 +58,7 @@ class ScopeCtx:
     def validate_unique_ids(self) -> None:
         from .gtk_list_item_factory import ExtListItemFactory
 
-        passed = {}
+        passed: T.Dict[str, AstNode] = {}
         for obj in self._iter_recursive(self.node):
             if obj.tokens["id"] is None:
                 continue
@@ -71,10 +71,16 @@ class ScopeCtx:
                     raise CompileError(
                         f"Duplicate object ID '{obj.tokens['id']}'",
                         token.range,
+                        references=[
+                            ErrorReference(
+                                passed[obj.tokens["id"]].group.tokens["id"].range,
+                                "previous declaration was here",
+                            )
+                        ],
                     )
             passed[obj.tokens["id"]] = obj
 
-    def _iter_recursive(self, node: AstNode):
+    def _iter_recursive(self, node: AstNode) -> T.Generator[AstNode, T.Any, None]:
         yield node
         for child in node.children:
             if child.context[ScopeCtx] is self:
diff --git a/blueprintcompiler/parse_tree.py b/blueprintcompiler/parse_tree.py
index a215f19..3924ee5 100644
--- a/blueprintcompiler/parse_tree.py
+++ b/blueprintcompiler/parse_tree.py
@@ -235,7 +235,15 @@ class ParseNode:
         start_idx = ctx.index
         inner_ctx = ctx.create_child()
 
-        if self._parse(inner_ctx):
+        try:
+            result = self._parse(inner_ctx)
+        except Exception as e:
+            # If an exception occurs, there's an explicit error, not just a rule that didn't match. Apply the context
+            # state so that whichever rule handles the exception (e.g. a Statement) knows where the error occurred.
+            ctx.apply_child(inner_ctx)
+            raise e
+
+        if result:
             ctx.apply_child(inner_ctx)
             if ctx.index == start_idx:
                 return ParseResult.EMPTY
@@ -269,11 +277,11 @@ class Err(ParseNode):
         if self.child.parse(ctx).failed():
             start_idx = ctx.start
             while ctx.tokens[start_idx].type in SKIP_TOKENS:
-                start_idx += 1
+                start_idx -= 1
             start_token = ctx.tokens[start_idx]
 
             raise CompileError(
-                self.message, Range(start_token.start, start_token.start, ctx.text)
+                self.message, Range(start_token.end, start_token.end, ctx.text)
             )
         return True
 
@@ -350,7 +358,20 @@ class Statement(ParseNode):
 
         token = ctx.peek_token()
         if str(token) != self.end:
-            ctx.errors.append(CompileError(f"Expected `{self.end}`", token.range))
+            start_idx = ctx.index - 1
+            while ctx.tokens[start_idx].type in SKIP_TOKENS:
+                start_idx -= 1
+            start_token = ctx.tokens[start_idx]
+
+            position = (
+                start_token.start if ctx.index - 1 == start_idx else start_token.end
+            )
+
+            ctx.errors.append(
+                CompileError(
+                    f"Expected `{self.end}`", Range(position, position, ctx.text)
+                )
+            )
         else:
             ctx.next_token()
         return True
@@ -411,7 +432,6 @@ class Until(ParseNode):
                         ctx.skip_unexpected_token()
             except CompileError as e:
                 ctx.errors.append(e)
-                ctx.next_token()
 
         return True
 
diff --git a/tests/sample_errors/empty.err b/tests/sample_errors/empty.err
index 854962f..b30f437 100644
--- a/tests/sample_errors/empty.err
+++ b/tests/sample_errors/empty.err
@@ -1 +1 @@
-1,0,0,File must start with a "using Gtk" directive (e.g. `using Gtk 4.0;`)
\ No newline at end of file
+1,1,0,File must start with a "using Gtk" directive (e.g. `using Gtk 4.0;`)
\ No newline at end of file
diff --git a/tests/sample_errors/expected_semicolon.err b/tests/sample_errors/expected_semicolon.err
index bfabc9a..a1b2a36 100644
--- a/tests/sample_errors/expected_semicolon.err
+++ b/tests/sample_errors/expected_semicolon.err
@@ -1 +1 @@
-6,1,1,Expected `;`
\ No newline at end of file
+5,4,0,Expected `;`
\ No newline at end of file
diff --git a/tests/sample_errors/incomplete_signal.err b/tests/sample_errors/incomplete_signal.err
index 901ef3b..c61ef28 100644
--- a/tests/sample_errors/incomplete_signal.err
+++ b/tests/sample_errors/incomplete_signal.err
@@ -1,2 +1 @@
-5,1,0,Expected a signal detail name
-4,9,3,Unexpected tokens
\ No newline at end of file
+4,11,0,Expected a signal detail name
\ No newline at end of file
diff --git a/tests/sample_errors/menu_toplevel_attribute.err b/tests/sample_errors/menu_toplevel_attribute.err
index 8f3ef26..ee588d0 100644
--- a/tests/sample_errors/menu_toplevel_attribute.err
+++ b/tests/sample_errors/menu_toplevel_attribute.err
@@ -1,2 +1 @@
-4,5,21,Attributes are not permitted at the top level of a menu
-4,16,10,Unexpected tokens
\ No newline at end of file
+4,5,21,Attributes are not permitted at the top level of a menu
\ No newline at end of file
diff --git a/tests/sample_errors/no_import_version.err b/tests/sample_errors/no_import_version.err
index db830e0..4ee792f 100644
--- a/tests/sample_errors/no_import_version.err
+++ b/tests/sample_errors/no_import_version.err
@@ -1 +1 @@
-1,11,0,Expected a version number for GTK
+1,10,0,Expected a version number for GTK
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 9cd5baf..7d32ecb 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -143,9 +143,9 @@ class TestSamples(unittest.TestCase):
             ]
 
             def error_str(error: CompileError):
-                line, col = utils.idx_to_pos(error.range.start + 1, blueprint)
+                line, col = utils.idx_to_pos(error.range.start, blueprint)
                 len = error.range.length
-                return ",".join([str(line + 1), str(col), str(len), error.message])
+                return ",".join([str(line + 1), str(col + 1), str(len), error.message])
 
             actual = "\n".join([error_str(error) for error in errors])