Parse escape sequences instead of using replace

That way we can warn about invalid sequences. Also, the previous code had at least one subtle bug (`\\\\'`).
This commit is contained in:
James Westman 2023-09-28 14:41:12 -05:00
parent bc798c544c
commit ea92838cf3
7 changed files with 76 additions and 21 deletions

View file

@ -23,7 +23,7 @@ from dataclasses import dataclass
from enum import Enum
from .gir import *
from .utils import Colors
from .utils import Colors, escape_quote
from .xml_reader import Element, parse, parse_string
__all__ = ["decompile"]
@ -253,15 +253,6 @@ def decompiler(tag, cdata=False):
return decorator
def escape_quote(string: str) -> str:
return (
string.replace("\\", "\\\\")
.replace("'", "\\'")
.replace('"', '\\"')
.replace("\n", "\\n")
)
@decompiler("interface")
def decompile_interface(ctx, gir):
return gir

View file

@ -22,6 +22,7 @@
import typing as T
from enum import Enum
from . import utils
from .ast_utils import AstNode
from .errors import (
CompileError,
@ -573,14 +574,19 @@ class UseQuoted(ParseNode):
if token.type != TokenType.QUOTED:
return False
string = (
str(token)[1:-1]
.replace("\\n", "\n")
.replace('\\"', '"')
.replace("\\\\", "\\")
.replace("\\'", "'")
)
ctx.set_group_val(self.key, string, token)
unescaped = None
try:
unescaped = utils.unescape_quote(str(token))
except utils.UnescapeError as e:
start = ctx.tokens[ctx.index - 1].start
range = Range(start + e.start, start + e.end, ctx.text)
ctx.errors.append(
CompileError(f"Invalid escape sequence '{range.text}'", range)
)
ctx.set_group_val(self.key, unescaped, token)
return True

View file

@ -18,6 +18,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import typing as T
from dataclasses import dataclass
class Colors:
@ -98,3 +99,54 @@ def idxs_to_range(start: int, end: int, text: str):
"character": end_c,
},
}
@dataclass
class UnescapeError(Exception):
start: int
end: int
def escape_quote(string: str) -> str:
return (
string.replace("\\", "\\\\")
.replace("'", "\\'")
.replace('"', '\\"')
.replace("\n", "\\n")
.replace("\t", "\\t")
)
def unescape_quote(string: str) -> str:
string = string[1:-1]
REPLACEMENTS = {
"\\": "\\",
"n": "\n",
"t": "\t",
'"': '"',
"'": "'",
}
result = ""
i = 0
while i < len(string):
c = string[i]
if c == "\\":
i += 1
if i >= len(string):
from .errors import CompilerBugError
raise CompilerBugError()
if r := REPLACEMENTS.get(string[i]):
result += r
else:
raise UnescapeError(i, i + 2)
else:
result += c
i += 1
return result

View file

@ -0,0 +1,5 @@
using Gtk 4.0;
Label {
label: '***** \f *****';
}

View file

@ -0,0 +1 @@
4,17,2,Invalid escape sequence '\f'

View file

@ -1,5 +1,5 @@
using Gtk 4.0;
Label {
label: "Test 1 2 3\n & 4 \"5\' 6";
label: "\\\\'Test 1 2 3\n & 4 \"5\' 6 \t";
}

View file

@ -7,7 +7,7 @@ corresponding .blp file and regenerate this file with blueprint-compiler.
<interface>
<requires lib="gtk" version="4.0"/>
<object class="GtkLabel">
<property name="label">Test 1 2 3
&amp; 4 "5' 6</property>
<property name="label">\\'Test 1 2 3
&amp; 4 "5' 6 </property>
</object>
</interface>