From 8d587b62a02b62a0a3d24c1b2ebf292035ce86d2 Mon Sep 17 00:00:00 2001 From: James Westman Date: Tue, 4 Jan 2022 16:59:19 -0600 Subject: [PATCH] parser: Simplify parser construction - Replace several different parse nodes with Match, which matches the exact text of a token but not the token type - Allow arrays to be used in place of Sequence --- blueprintcompiler/extensions/gtk_a11y.py | 12 +-- .../extensions/gtk_combo_box_text.py | 29 +++--- .../extensions/gtk_file_filter.py | 16 ++-- blueprintcompiler/extensions/gtk_layout.py | 8 +- blueprintcompiler/extensions/gtk_menu.py | 72 +++++++------- .../extensions/gtk_size_group.py | 12 +-- .../extensions/gtk_string_list.py | 15 ++- blueprintcompiler/extensions/gtk_styles.py | 12 +-- blueprintcompiler/parse_tree.py | 95 ++++++++----------- blueprintcompiler/parser.py | 78 ++++++++------- blueprintcompiler/parser_utils.py | 44 ++++----- 11 files changed, 182 insertions(+), 211 deletions(-) diff --git a/blueprintcompiler/extensions/gtk_a11y.py b/blueprintcompiler/extensions/gtk_a11y.py index 3ffbc2e..01e5a83 100644 --- a/blueprintcompiler/extensions/gtk_a11y.py +++ b/blueprintcompiler/extensions/gtk_a11y.py @@ -155,18 +155,18 @@ a11y_prop = Group( A11yProperty, Statement( UseIdent("name"), - Op(":"), + ":", value.expected("a value"), ) ) a11y = Group( A11y, - Sequence( - Keyword("accessibility", True), - OpenBlock(), - Until(a11y_prop, CloseBlock()), - ) + [ + Keyword("accessibility"), + "{", + Until(a11y_prop, "}"), + ] ) diff --git a/blueprintcompiler/extensions/gtk_combo_box_text.py b/blueprintcompiler/extensions/gtk_combo_box_text.py index b42f02b..71dec36 100644 --- a/blueprintcompiler/extensions/gtk_combo_box_text.py +++ b/blueprintcompiler/extensions/gtk_combo_box_text.py @@ -52,28 +52,23 @@ class Item(BaseTypedAttribute): item = Group( Item, - Sequence( - Optional( - Sequence( - UseIdent("name"), - Op(":"), - ) - ), + [ + Optional([ + UseIdent("name"), + ":", + ]), value, - ) + ] ) items = Group( Items, - Sequence( - Keyword("items", True), - OpenBracket(), - Delimited( - item, - Comma() - ), - CloseBracket(), - ) + [ + Keyword("items"), + "[", + Delimited(item, ","), + "]", + ] ) diff --git a/blueprintcompiler/extensions/gtk_file_filter.py b/blueprintcompiler/extensions/gtk_file_filter.py index 8b45961..116431d 100644 --- a/blueprintcompiler/extensions/gtk_file_filter.py +++ b/blueprintcompiler/extensions/gtk_file_filter.py @@ -49,22 +49,22 @@ class FilterString(AstNode): def create_node(tag_name: str, singular: str): return Group( Filters, - Sequence( - Keyword(tag_name, True), + [ + Keyword(tag_name), UseLiteral("tag_name", tag_name), - OpenBracket(), + "[", Delimited( Group( FilterString, - Sequence( + [ UseQuoted("name"), UseLiteral("tag_name", singular), - ) + ] ), - Comma(), + ",", ), - CloseBracket(), - ) + "]", + ] ) diff --git a/blueprintcompiler/extensions/gtk_layout.py b/blueprintcompiler/extensions/gtk_layout.py index 65bc544..833bc0b 100644 --- a/blueprintcompiler/extensions/gtk_layout.py +++ b/blueprintcompiler/extensions/gtk_layout.py @@ -53,7 +53,7 @@ layout_prop = Group( LayoutProperty, Statement( UseIdent("name"), - Op(":"), + ":", value.expected("a value"), ) ) @@ -61,9 +61,9 @@ layout_prop = Group( layout = Group( Layout, Sequence( - Keyword("layout", True), - OpenBlock(), - Until(layout_prop, CloseBlock()), + Keyword("layout"), + "{", + Until(layout_prop, "}"), ) ) diff --git a/blueprintcompiler/extensions/gtk_menu.py b/blueprintcompiler/extensions/gtk_menu.py index a89bd5e..bbb8361 100644 --- a/blueprintcompiler/extensions/gtk_menu.py +++ b/blueprintcompiler/extensions/gtk_menu.py @@ -51,94 +51,94 @@ menu_contents = Sequence() menu_section = Group( Menu, - Sequence( - Keyword("section"), + [ + "section", UseLiteral("tag", "section"), Optional(UseIdent("id")), menu_contents - ) + ] ) menu_submenu = Group( Menu, - Sequence( - Keyword("submenu"), + [ + "submenu", UseLiteral("tag", "submenu"), Optional(UseIdent("id")), menu_contents - ) + ] ) menu_attribute = Group( MenuAttribute, - Sequence( + [ UseIdent("name"), - Op(":"), + ":", value.expected("a value"), - StmtEnd().expected("`;`"), - ) + Match(";").expected(), + ] ) menu_item = Group( Menu, - Sequence( - Keyword("item"), + [ + "item", UseLiteral("tag", "item"), Optional(UseIdent("id")), - OpenBlock().expected("`{`"), - Until(menu_attribute, CloseBlock()), - ) + Match("{").expected(), + Until(menu_attribute, "}"), + ] ) menu_item_shorthand = Group( Menu, - Sequence( - Keyword("item"), + [ + "item", UseLiteral("tag", "item"), - OpenParen(), + "(", Group( MenuAttribute, - Sequence(UseLiteral("name", "label"), value), + [UseLiteral("name", "label"), value], ), - Optional(Sequence( - Comma(), - Optional(Sequence( + Optional([ + ",", + Optional([ Group( MenuAttribute, - Sequence(UseLiteral("name", "action"), value), + [UseLiteral("name", "action"), value], ), - Optional(Sequence( - Comma(), + Optional([ + ",", Group( MenuAttribute, - Sequence(UseLiteral("name", "icon"), value), + [UseLiteral("name", "icon"), value], ), - )) - )) - )), - CloseParen().expected("')'"), - ) + ]) + ]) + ]), + Match(")").expected(), + ] ) menu_contents.children = [ - OpenBlock(), + Match("{"), Until(AnyOf( menu_section, menu_submenu, menu_item_shorthand, menu_item, menu_attribute, - ), CloseBlock()), + ), "}"), ] menu = Group( Menu, - Sequence( - Keyword("menu"), + [ + "menu", UseLiteral("tag", "menu"), Optional(UseIdent("id")), menu_contents - ), + ], ) diff --git a/blueprintcompiler/extensions/gtk_size_group.py b/blueprintcompiler/extensions/gtk_size_group.py index b2a74c7..3f9a547 100644 --- a/blueprintcompiler/extensions/gtk_size_group.py +++ b/blueprintcompiler/extensions/gtk_size_group.py @@ -60,18 +60,18 @@ class Widget(AstNode): widgets = Group( Widgets, - Sequence( - Keyword("widgets", True), - OpenBracket(), + [ + Keyword("widgets"), + "[", Delimited( Group( Widget, UseIdent("name"), ), - Comma(), + ",", ), - CloseBracket(), - ) + "]", + ] ) diff --git a/blueprintcompiler/extensions/gtk_string_list.py b/blueprintcompiler/extensions/gtk_string_list.py index 08bbc3c..9647080 100644 --- a/blueprintcompiler/extensions/gtk_string_list.py +++ b/blueprintcompiler/extensions/gtk_string_list.py @@ -61,15 +61,12 @@ item = Group( strings = Group( Items, - Sequence( - Keyword("strings", True), - OpenBracket(), - Delimited( - item, - Comma() - ), - CloseBracket(), - ) + [ + Keyword("strings"), + "[", + Delimited(item, ","), + "]", + ] ) diff --git a/blueprintcompiler/extensions/gtk_styles.py b/blueprintcompiler/extensions/gtk_styles.py index cfabbf3..48e1fd6 100644 --- a/blueprintcompiler/extensions/gtk_styles.py +++ b/blueprintcompiler/extensions/gtk_styles.py @@ -46,18 +46,18 @@ class StyleClass(AstNode): styles = Group( Styles, - Sequence( - Keyword("styles", True), - OpenBracket(), + [ + Keyword("styles"), + "[", Delimited( Group( StyleClass, UseQuoted("name") ), - Comma(), + ",", ), - CloseBracket(), - ) + "]", + ] ) diff --git a/blueprintcompiler/parse_tree.py b/blueprintcompiler/parse_tree.py index 523acb4..dcb2012 100644 --- a/blueprintcompiler/parse_tree.py +++ b/blueprintcompiler/parse_tree.py @@ -239,7 +239,7 @@ class Err(ParseNode): """ ParseNode that emits a compile error if it fails to parse. """ def __init__(self, child, message): - self.child = child + self.child = to_parse_node(child) self.message = message def _parse(self, ctx): @@ -258,7 +258,7 @@ class Fail(ParseNode): """ ParseNode that emits a compile error if it parses successfully. """ def __init__(self, child, message): - self.child = child + self.child = to_parse_node(child) self.message = message def _parse(self, ctx): @@ -277,7 +277,7 @@ class Group(ParseNode): """ ParseNode that creates a match group. """ def __init__(self, ast_type, child): self.ast_type = ast_type - self.child = child + self.child = to_parse_node(child) def _parse(self, ctx: ParseContext) -> bool: ctx.skip() @@ -288,7 +288,7 @@ class Group(ParseNode): class Sequence(ParseNode): """ ParseNode that attempts to match all of its children in sequence. """ def __init__(self, *children): - self.children = children + self.children = [to_parse_node(child) for child in children] def _parse(self, ctx) -> bool: for child in self.children: @@ -301,7 +301,7 @@ class Statement(ParseNode): """ ParseNode that attempts to match all of its children in sequence. If any child raises an error, the error will be logged but parsing will continue. """ def __init__(self, *children): - self.children = children + self.children = [to_parse_node(child) for child in children] def _parse(self, ctx) -> bool: for child in self.children: @@ -325,7 +325,7 @@ class AnyOf(ParseNode): """ ParseNode that attempts to match exactly one of its children. Child nodes are attempted in order. """ def __init__(self, *children): - self.children = children + self.children = [to_parse_node(child) for child in children] def _parse(self, ctx): for child in self.children: @@ -339,8 +339,8 @@ class Until(ParseNode): the child does not match, one token is skipped and the match is attempted again. """ def __init__(self, child, delimiter): - self.child = child - self.delimiter = delimiter + self.child = to_parse_node(child) + self.delimiter = to_parse_node(delimiter) def _parse(self, ctx): while not self.delimiter.parse(ctx).succeeded(): @@ -362,7 +362,7 @@ class ZeroOrMore(ParseNode): times). It cannot fail to parse. If its child raises an exception, one token will be skipped and parsing will continue. """ def __init__(self, child): - self.child = child + self.child = to_parse_node(child) def _parse(self, ctx): @@ -379,8 +379,8 @@ class Delimited(ParseNode): """ ParseNode that matches its first child any number of times (including zero times) with its second child in between and optionally at the end. """ def __init__(self, child, delimiter): - self.child = child - self.delimiter = delimiter + self.child = to_parse_node(child) + self.delimiter = to_parse_node(delimiter) def _parse(self, ctx): while self.child.parse(ctx).matched() and self.delimiter.parse(ctx).matched(): @@ -392,60 +392,36 @@ class Optional(ParseNode): """ ParseNode that matches its child zero or one times. It cannot fail to parse. """ def __init__(self, child): - self.child = child + self.child = to_parse_node(child) def _parse(self, ctx): self.child.parse(ctx) return True -class StaticToken(ParseNode): - """ Base class for ParseNodes that match a token type without inspecting - the token's contents. """ - token_type: T.Optional[TokenType] = None - +class Eof(ParseNode): + """ ParseNode that matches an EOF token. """ def _parse(self, ctx: ParseContext) -> bool: - return ctx.next_token().type == self.token_type - -class StmtEnd(StaticToken): - token_type = TokenType.STMT_END - -class Eof(StaticToken): - token_type = TokenType.EOF - -class OpenBracket(StaticToken): - token_type = TokenType.OPEN_BRACKET - -class CloseBracket(StaticToken): - token_type = TokenType.CLOSE_BRACKET - -class OpenBlock(StaticToken): - token_type = TokenType.OPEN_BLOCK - -class CloseBlock(StaticToken): - token_type = TokenType.CLOSE_BLOCK - -class OpenParen(StaticToken): - token_type = TokenType.OPEN_PAREN - -class CloseParen(StaticToken): - token_type = TokenType.CLOSE_PAREN - -class Comma(StaticToken): - token_type = TokenType.COMMA + token = ctx.next_token() + return token.type == TokenType.EOF -class Op(ParseNode): - """ ParseNode that matches the given operator. """ +class Match(ParseNode): + """ ParseNode that matches the given literal token. """ def __init__(self, op): self.op = op def _parse(self, ctx: ParseContext) -> bool: token = ctx.next_token() - if token.type != TokenType.OP: - return False return str(token) == self.op + def expected(self, expect: str = None): + """ Convenience method for err(). """ + if expect is None: + return self.err(f"Expected '{self.op}'") + else: + return self.err("Expected " + expect) + class UseIdent(ParseNode): """ ParseNode that matches any identifier and sets it in a key=value pair on @@ -529,17 +505,22 @@ class UseLiteral(ParseNode): class Keyword(ParseNode): - """ Matches the given identifier. """ - def __init__(self, kw, set_token=False): + """ Matches the given identifier and sets it as a named token, with the name + being the identifier itself. """ + def __init__(self, kw): self.kw = kw self.set_token = True def _parse(self, ctx: ParseContext): token = ctx.next_token() - if token.type != TokenType.IDENT: - return False - - if self.set_token: - ctx.set_group_val(self.kw, True, token) - + ctx.set_group_val(self.kw, True, token) return str(token) == self.kw + + +def to_parse_node(value) -> ParseNode: + if isinstance(value, str): + return Match(value) + elif isinstance(value, list): + return Sequence(*value) + else: + return value diff --git a/blueprintcompiler/parser.py b/blueprintcompiler/parser.py index d2c8f36..53738b3 100644 --- a/blueprintcompiler/parser.py +++ b/blueprintcompiler/parser.py @@ -32,8 +32,8 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: gtk_directive = Group( ast.GtkDirective, Statement( - Keyword("using").err("File must start with a \"using Gtk\" directive (e.g. `using Gtk 4.0;`)"), - Keyword("Gtk").err("File must start with a \"using Gtk\" directive (e.g. `using Gtk 4.0;`)"), + Match("using").err("File must start with a \"using Gtk\" directive (e.g. `using Gtk 4.0;`)"), + Match("Gtk").err("File must start with a \"using Gtk\" directive (e.g. `using Gtk 4.0;`)"), UseNumberText("version").expected("a version number for GTK"), ) ) @@ -41,7 +41,7 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: import_statement = Group( ast.Import, Statement( - Keyword("using"), + "using", UseIdent("namespace").expected("a GIR namespace"), UseNumberText("version").expected("a version number"), ) @@ -56,7 +56,7 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: ast.Property, Statement( UseIdent("name"), - Op(":"), + ":", AnyOf( *OBJECT_HOOKS, object, @@ -69,15 +69,15 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: ast.Property, Statement( UseIdent("name"), - Op(":"), - Keyword("bind"), + ":", + "bind", UseIdent("bind_source").expected("the ID of a source object to bind from"), - Op("."), + ".", UseIdent("bind_property").expected("a property name to bind from"), ZeroOrMore(AnyOf( - Sequence(Keyword("sync-create"), UseLiteral("sync_create", True)), - Sequence(Keyword("inverted"), UseLiteral("invert-boolean", True)), - Sequence(Keyword("bidirectional"), UseLiteral("bidirectional", True)), + ["sync-create", UseLiteral("sync_create", True)], + ["inverted", UseLiteral("inverted", True)], + ["bidirectional", UseLiteral("bidirectional", True)], )), ) ) @@ -86,47 +86,47 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: ast.Signal, Statement( UseIdent("name"), - Optional(Sequence( - Op("::"), + Optional([ + "::", UseIdent("detail_name").expected("a signal detail name"), - )), - Op("=>"), + ]), + "=>", UseIdent("handler").expected("the name of a function to handle the signal"), - OpenParen().expected("argument list"), + Match("(").expected("argument list"), Optional(UseIdent("object")).expected("object identifier"), - CloseParen().expected("`)`"), + Match(")").expected(), ZeroOrMore(AnyOf( - Sequence(Keyword("swapped"), UseLiteral("swapped", True)), - Sequence(Keyword("after"), UseLiteral("after", True)), + [Keyword("swapped"), UseLiteral("swapped", True)], + [Keyword("after"), UseLiteral("after", True)], )), ) ) child = Group( ast.Child, - Sequence( - Optional(Sequence( - OpenBracket(), - Optional(Sequence(Keyword("internal-child"), UseLiteral("internal_child", True))), + [ + Optional([ + "[", + Optional(["internal-child", UseLiteral("internal_child", True)]), UseIdent("child_type").expected("a child type"), - CloseBracket(), - )), + "]", + ]), object, - ) + ] ) object_content = Group( ast.ObjectContent, - Sequence( - OpenBlock(), + [ + "{", Until(AnyOf( *OBJECT_CONTENT_HOOKS, binding, property, signal, child, - ), CloseBlock()), - ) + ), "}"), + ] ) # work around the recursive reference @@ -138,22 +138,20 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: template = Group( ast.Template, - Sequence( - Keyword("template"), + [ + "template", UseIdent("name").expected("template class name"), - Optional( - Sequence( - Op(":"), - class_name.expected("parent class"), - ) - ), + Optional([ + Match(":"), + class_name.expected("parent class"), + ]), object_content.expected("block"), - ) + ] ) ui = Group( ast.UI, - Sequence( + [ gtk_directive, ZeroOrMore(import_statement), Until(AnyOf( @@ -161,7 +159,7 @@ def parse(tokens) -> T.Tuple[ast.UI, T.Optional[MultipleErrors]]: template, object, ), Eof()), - ) + ] ) ctx = ParseContext(tokens) diff --git a/blueprintcompiler/parser_utils.py b/blueprintcompiler/parser_utils.py index ec10ebd..13086a6 100644 --- a/blueprintcompiler/parser_utils.py +++ b/blueprintcompiler/parser_utils.py @@ -23,16 +23,16 @@ from .parse_tree import * class_name = AnyOf( - Sequence( + [ UseIdent("namespace"), - Op("."), + ".", UseIdent("class_name"), - ), - Sequence( - Op("."), + ], + [ + ".", UseIdent("class_name"), UseLiteral("ignore_gir", True), - ), + ], UseIdent("class_name"), ) @@ -51,31 +51,31 @@ ident_value = Group( flags_value = Group( ast.FlagsValue, - Sequence( + [ Group(ast.Flag, UseIdent("value")), - Op("|"), - Delimited(Group(ast.Flag, UseIdent("value")), Op("|")), - ), + "|", + Delimited(Group(ast.Flag, UseIdent("value")), "|"), + ], ) translated_string = Group( ast.TranslatedStringValue, AnyOf( - Sequence( - Keyword("_"), - OpenParen(), + [ + "_", + "(", UseQuoted("value").expected("a quoted string"), - CloseParen().expected("`)`"), - ), - Sequence( - Keyword("C_"), - OpenParen(), + Match(")").expected(), + ], + [ + "C_", + "(", UseQuoted("context").expected("a quoted string"), - Comma(), + ",", UseQuoted("value").expected("a quoted string"), - Optional(Comma()), - CloseParen().expected("`)`"), - ), + Optional(","), + Match(")").expected(), + ], ), )