From ea92838cf318a5a6e8f9334ccdef063470cc4d96 Mon Sep 17 00:00:00 2001 From: James Westman Date: Thu, 28 Sep 2023 14:41:12 -0500 Subject: [PATCH] Parse escape sequences instead of using replace That way we can warn about invalid sequences. Also, the previous code had at least one subtle bug (`\\\\'`). --- blueprintcompiler/decompiler.py | 11 +---- blueprintcompiler/parse_tree.py | 22 +++++---- blueprintcompiler/utils.py | 52 +++++++++++++++++++++ tests/sample_errors/bad_escape_sequence.blp | 5 ++ tests/sample_errors/bad_escape_sequence.err | 1 + tests/samples/strings.blp | 2 +- tests/samples/strings.ui | 4 +- 7 files changed, 76 insertions(+), 21 deletions(-) create mode 100644 tests/sample_errors/bad_escape_sequence.blp create mode 100644 tests/sample_errors/bad_escape_sequence.err diff --git a/blueprintcompiler/decompiler.py b/blueprintcompiler/decompiler.py index 98e00af..550f60b 100644 --- a/blueprintcompiler/decompiler.py +++ b/blueprintcompiler/decompiler.py @@ -23,7 +23,7 @@ from dataclasses import dataclass from enum import Enum from .gir import * -from .utils import Colors +from .utils import Colors, escape_quote from .xml_reader import Element, parse, parse_string __all__ = ["decompile"] @@ -253,15 +253,6 @@ def decompiler(tag, cdata=False): return decorator -def escape_quote(string: str) -> str: - return ( - string.replace("\\", "\\\\") - .replace("'", "\\'") - .replace('"', '\\"') - .replace("\n", "\\n") - ) - - @decompiler("interface") def decompile_interface(ctx, gir): return gir diff --git a/blueprintcompiler/parse_tree.py b/blueprintcompiler/parse_tree.py index 8f3ef31..fff6e4a 100644 --- a/blueprintcompiler/parse_tree.py +++ b/blueprintcompiler/parse_tree.py @@ -22,6 +22,7 @@ import typing as T from enum import Enum +from . import utils from .ast_utils import AstNode from .errors import ( CompileError, @@ -573,14 +574,19 @@ class UseQuoted(ParseNode): if token.type != TokenType.QUOTED: return False - string = ( - str(token)[1:-1] - .replace("\\n", "\n") - .replace('\\"', '"') - .replace("\\\\", "\\") - .replace("\\'", "'") - ) - ctx.set_group_val(self.key, string, token) + unescaped = None + + try: + unescaped = utils.unescape_quote(str(token)) + except utils.UnescapeError as e: + start = ctx.tokens[ctx.index - 1].start + range = Range(start + e.start, start + e.end, ctx.text) + ctx.errors.append( + CompileError(f"Invalid escape sequence '{range.text}'", range) + ) + + ctx.set_group_val(self.key, unescaped, token) + return True diff --git a/blueprintcompiler/utils.py b/blueprintcompiler/utils.py index 4c4b44a..6d03a9e 100644 --- a/blueprintcompiler/utils.py +++ b/blueprintcompiler/utils.py @@ -18,6 +18,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import typing as T +from dataclasses import dataclass class Colors: @@ -98,3 +99,54 @@ def idxs_to_range(start: int, end: int, text: str): "character": end_c, }, } + + +@dataclass +class UnescapeError(Exception): + start: int + end: int + + +def escape_quote(string: str) -> str: + return ( + string.replace("\\", "\\\\") + .replace("'", "\\'") + .replace('"', '\\"') + .replace("\n", "\\n") + .replace("\t", "\\t") + ) + + +def unescape_quote(string: str) -> str: + string = string[1:-1] + + REPLACEMENTS = { + "\\": "\\", + "n": "\n", + "t": "\t", + '"': '"', + "'": "'", + } + + result = "" + i = 0 + while i < len(string): + c = string[i] + if c == "\\": + i += 1 + + if i >= len(string): + from .errors import CompilerBugError + + raise CompilerBugError() + + if r := REPLACEMENTS.get(string[i]): + result += r + else: + raise UnescapeError(i, i + 2) + else: + result += c + + i += 1 + + return result diff --git a/tests/sample_errors/bad_escape_sequence.blp b/tests/sample_errors/bad_escape_sequence.blp new file mode 100644 index 0000000..4b109c6 --- /dev/null +++ b/tests/sample_errors/bad_escape_sequence.blp @@ -0,0 +1,5 @@ +using Gtk 4.0; + +Label { + label: '***** \f *****'; +} diff --git a/tests/sample_errors/bad_escape_sequence.err b/tests/sample_errors/bad_escape_sequence.err new file mode 100644 index 0000000..e4ec183 --- /dev/null +++ b/tests/sample_errors/bad_escape_sequence.err @@ -0,0 +1 @@ +4,17,2,Invalid escape sequence '\f' \ No newline at end of file diff --git a/tests/samples/strings.blp b/tests/samples/strings.blp index ef237ae..08bb418 100644 --- a/tests/samples/strings.blp +++ b/tests/samples/strings.blp @@ -1,5 +1,5 @@ using Gtk 4.0; Label { - label: "Test 1 2 3\n & 4 \"5\' 6"; + label: "\\\\'Test 1 2 3\n & 4 \"5\' 6 \t"; } diff --git a/tests/samples/strings.ui b/tests/samples/strings.ui index 1dea963..e7fef2e 100644 --- a/tests/samples/strings.ui +++ b/tests/samples/strings.ui @@ -7,7 +7,7 @@ corresponding .blp file and regenerate this file with blueprint-compiler. - Test 1 2 3 - & 4 "5' 6 + \\'Test 1 2 3 + & 4 "5' 6