commit 3a4b7c6dc34f38ec7361f3ae8ea9ec86f5890d2f Author: James Westman Date: Sun Oct 17 15:26:34 2021 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/gtk-blueprint-tool.py b/gtk-blueprint-tool.py new file mode 100755 index 0000000..e7aed77 --- /dev/null +++ b/gtk-blueprint-tool.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +# gtk-blueprint-tool.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +from gtkblueprinttool.main import BlueprintApp + +if __name__ == "__main__": + BlueprintApp().main() diff --git a/gtkblueprinttool/ast.py b/gtkblueprinttool/ast.py new file mode 100644 index 0000000..86b8598 --- /dev/null +++ b/gtkblueprinttool/ast.py @@ -0,0 +1,184 @@ +# ast.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from .errors import assert_true +from .xml_emitter import XmlEmitter + + +class AstNode: + """ Base class for nodes in the abstract syntax tree. """ + + def generate(self) -> str: + """ Generates an XML string from the node. """ + xml = XmlEmitter() + self.emit_xml(xml) + return xml.result + + def emit_xml(self, xml: XmlEmitter): + """ Emits the XML representation of this AST node to the XmlEmitter. """ + raise NotImplementedError() + + +class UI(AstNode): + """ The AST node for the entire file """ + + def __init__(self, gtk_directives=[], imports=[], objects=[], templates=[]): + assert_true(len(gtk_directives) == 1) + + self.gtk_directive = gtk_directives[0] + self.imports = imports + self.objects = objects + self.templates = templates + + def emit_xml(self, xml: XmlEmitter): + xml.start_tag("interface") + self.gtk_directive.emit_xml(xml) + for object in self.objects: + object.emit_xml(xml) + for template in self.templates: + template.emit_xml(xml) + xml.end_tag() + + +class GtkDirective(AstNode): + child_type = "gtk_directives" + def __init__(self, version): + self.version = version + + def emit_xml(self, xml: XmlEmitter): + xml.put_self_closing("requires", lib="gtk", version=self.version) + + +class Import(AstNode): + child_type = "imports" + def __init__(self, namespace, version): + self.namespace = namespace + self.version = version + + def emit_xml(self, xml: XmlEmitter): + pass + + +class Template(AstNode): + child_type = "templates" + def __init__(self, name, class_name, object_content, namespace=None): + assert_true(len(object_content) == 1) + + self.name = name + self.parent_namespace = namespace + self.parent_class = class_name + self.object_content = object_content[0] + + def emit_xml(self, xml: XmlEmitter): + xml.start_tag("template", **{ + "class": self.name, + "parent": self.parent_namespace + self.parent_class, + }) + self.object_content.emit_xml(xml) + xml.end_tag() + + +class Object(AstNode): + child_type = "objects" + def __init__(self, class_name, object_content, namespace=None, id=None): + assert_true(len(object_content) == 1) + + self.namespace = namespace + self.class_name = class_name + self.id = id + self.object_content = object_content[0] + + def emit_xml(self, xml: XmlEmitter): + xml.start_tag("object", **{ + "class": self.namespace + self.class_name, + "id": self.id, + }) + self.object_content.emit_xml(xml) + xml.end_tag() + + +class Child(AstNode): + child_type = "children" + def __init__(self, objects, child_type=None): + assert_true(len(objects) == 1) + self.object = objects[0] + self.child_type = child_type + + def emit_xml(self, xml: XmlEmitter): + xml.start_tag("child", type=self.child_type) + self.object.emit_xml(xml) + xml.end_tag() + + +class ObjectContent(AstNode): + child_type = "object_content" + def __init__(self, properties=[], signals=[], children=[]): + self.properties = properties + self.signals = signals + self.children = children + + def emit_xml(self, xml: XmlEmitter): + for prop in self.properties: + prop.emit_xml(xml) + for signal in self.signals: + signal.emit_xml(xml) + for child in self.children: + child.emit_xml(xml) + + +class Property(AstNode): + child_type = "properties" + def __init__(self, name, value=None, translatable=False, bind_source=None, bind_property=None): + self.name = name + self.value = value + self.translatable = translatable + self.bind_source = bind_source + self.bind_property = bind_property + + def emit_xml(self, xml: XmlEmitter): + props = { + "name": self.name, + "translatable": "yes" if self.translatable else None, + "bind-source": self.bind_source, + "bind-property": self.bind_property, + } + if self.value is None: + xml.put_self_closing("property", **props) + else: + xml.start_tag("property", **props) + xml.put_text(str(self.value)) + xml.end_tag() + + +class Signal(AstNode): + child_type = "signals" + def __init__(self, name, handler, swapped=False, after=False, object=False, detail_name=None): + self.name = name + self.handler = handler + self.swapped = swapped + self.after = after + self.object = object + self.detail_name = detail_name + + def emit_xml(self, xml: XmlEmitter): + name = self.name + if self.detail_name: + name += "::" + self.detail_name + xml.put_self_closing("signal", name=name, handler=self.handler, swapped="true" if self.swapped else None) diff --git a/gtkblueprinttool/errors.py b/gtkblueprinttool/errors.py new file mode 100644 index 0000000..916dd20 --- /dev/null +++ b/gtkblueprinttool/errors.py @@ -0,0 +1,103 @@ +# errors.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import sys, traceback + + +class _colors: + RED = '\033[91m' + YELLOW = '\033[33m' + FAINT = '\033[2m' + BOLD = '\033[1m' + CLEAR = '\033[0m' + +class PrintableError(Exception): + """ Parent class for errors that can be pretty-printed for the user, e.g. + compilation warnings and errors. """ + + def pretty_print(self, filename, code): + raise NotImplementedError() + + +class CompileError(PrintableError): + category = "error" + + def __init__(self, message, start, end=None): + super().__init__(message) + + self.message = message + self.start = start + self.end = end + + def pretty_print(self, filename, code): + sp = code[:self.start+1].splitlines(keepends=True) + line_num = len(sp) + col_num = len(sp[-1]) + line = code.splitlines(True)[line_num-1] + + print(f"""{_colors.RED}{_colors.BOLD}{self.category}: {self.message}{_colors.CLEAR} +at {filename} line {line_num} column {col_num}: +{_colors.FAINT}{line_num :>4} |{_colors.CLEAR} {line} {_colors.FAINT}|{" "*(col_num)}^{_colors.CLEAR} +""") + + +class TokenizeError(CompileError): + def __init__(self, start): + super().__init__("Could not determine what kind of syntax is meant here", start) + + +class ParseError(CompileError): + pass + + +class MultipleErrors(PrintableError): + """ If multiple errors occur during compilation, they can be collected into + a list and re-thrown using the MultipleErrors exception. It will + pretty-print all of the errors and a count of how many errors there are. """ + + def __init__(self, errors: [CompileError]): + super().__init__() + self.errors = errors + + def pretty_print(self, filename, code) -> str: + for error in self.errors: + error.pretty_print(filename, code) + if len(self.errors) != 1: + print(f"{len(self.errors)} errors") + + +class CompilerBugError(Exception): + """ Emitted on assertion errors """ + + +def assert_true(truth: bool, message:str=None): + if not truth: + raise CompilerBugError(message) + + +def report_compile_error(): + """ Report an error and ask people to report it. """ + + print(traceback.format_exc()) + print(f"Arguments: {sys.argv}\n") + print(f"""{_colors.BOLD}{_colors.RED}***** COMPILER BUG ***** +The gtk-blueprint-tool program has crashed. Please report the above stacktrace +to the maintainers, along with the input file(s) if possible.{_colors.CLEAR}""") + diff --git a/gtkblueprinttool/main.py b/gtkblueprinttool/main.py new file mode 100644 index 0000000..66eb857 --- /dev/null +++ b/gtkblueprinttool/main.py @@ -0,0 +1,68 @@ +# main.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import argparse, sys + +from .errors import PrintableError, report_compile_error +from .pipeline import Pipeline +from . import parser, tokenizer + + +class BlueprintApp: + def main(self): + self.parser = argparse.ArgumentParser() + self.subparsers = self.parser.add_subparsers(metavar="command") + self.parser.set_defaults(func=self.cmd_help) + + compile = self.add_subcommand("compile", "Compile blueprint files", self.cmd_compile) + compile.add_argument("--output", dest="output", default="-") + compile.add_argument("input", metavar="filename", default=sys.stdin, type=argparse.FileType('r')) + + self.add_subcommand("help", "Show this message", self.cmd_help) + + try: + opts = self.parser.parse_args() + opts.func(opts) + except: + report_compile_error() + + + def add_subcommand(self, name, help, func): + parser = self.subparsers.add_parser(name, help=help) + parser.set_defaults(func=func) + return parser + + def cmd_help(self, opts): + self.parser.print_help() + + def cmd_compile(self, opts): + data = opts.input.read() + try: + tokens = tokenizer.tokenize(data) + ast = parser.parse(tokens) + xml = ast.generate() + if opts.output == "-": + print(xml) + else: + with open(opts.output, "w") as file: + file.write(xml) + except PrintableError as e: + e.pretty_print(opts.input.name, data) + sys.exit(1) diff --git a/gtkblueprinttool/parse_tree.py b/gtkblueprinttool/parse_tree.py new file mode 100644 index 0000000..74a4858 --- /dev/null +++ b/gtkblueprinttool/parse_tree.py @@ -0,0 +1,435 @@ +# parse_tree.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +""" Utilities for parsing an AST from a token stream. """ + +from enum import Enum + +from .ast import AstNode +from .errors import assert_true, CompilerBugError, CompileError, ParseError +from .tokenizer import Token, TokenType + + +_SKIP_TOKENS = [TokenType.COMMENT, TokenType.WHITESPACE] +_RECOVER_TOKENS = [TokenType.COMMENT, TokenType.STMT_END, TokenType.CLOSE_BLOCK, TokenType.EOF] + + +class ParseResult(Enum): + """ Represents the result of parsing. The extra EMPTY result is necessary + to avoid freezing the parser: imagine a ZeroOrMore node containing a node + that can match empty. It will repeatedly match empty and never advance + the parser. So, ZeroOrMore stops when a failed *or empty* match is + made. """ + + SUCCESS = 0 + FAILURE = 1 + EMPTY = 2 + + def matched(self): + return self == ParseResult.SUCCESS + + def succeeded(self): + return self != ParseResult.FAILURE + + def failed(self): + return self == ParseResult.FAILURE + + +class ParseGroup: + """ A matching group. Match groups have an AST type, children grouped by + type, and key=value pairs. At the end of parsing, the match groups will + be converted to AST nodes by passing the children and key=value pairs to + the AST node constructor. """ + + def __init__(self, ast_type): + self.ast_type = ast_type + self.children = {} + self.keys = {} + + def add_child(self, child): + child_type = child.ast_type.child_type + if child_type not in self.children: + self.children[child_type] = [] + self.children[child_type].append(child) + + def set_val(self, key, val): + assert_true(key not in self.keys) + + self.keys[key] = val + + def to_ast(self) -> AstNode: + """ Creates an AST node from the match group. """ + children = { + child_type: [child.to_ast() for child in children] + for child_type, children in self.children.items() + } + try: + return self.ast_type(**children, **self.keys) + except TypeError as e: + raise CompilerBugError(f"Failed to construct ast.{self.ast_type.__name__} from ParseGroup. See the previous stacktrace.") + + def __str__(self): + result = str(self.ast_type.__name__) + result += "".join([f"\n{key}: {val}" for key, val in self.keys.items()]) + "\n" + result += "\n".join([str(child) for children in self.children.values() for child in children]) + return result.replace("\n", "\n ") + + +class ParseContext: + """ Contains the state of the parser. """ + + def __init__(self, tokens, index=0): + self.tokens = list(tokens) + + self.index = index + self.start = index + self.group = None + self.group_keys = {} + self.group_children = [] + self.last_group = None + + self.errors = [] + self.warnings = [] + + + def create_child(self): + """ Creates a new ParseContext at this context's position. The new + context will be used to parse one node. If parsing is successful, the + new context will be applied to "self". If parsing fails, the new + context will be discarded. """ + return ParseContext(self.tokens, self.index) + + def apply_child(self, other): + """ Applies a child context to this context. """ + + if other.group is not None: + # If the other context had a match group, collect all the matched + # values into it and then add it to our own match group. + for key, val in other.group_keys.items(): + other.group.set_val(key, val) + for child in other.group_children: + other.group.add_child(child) + self.group_children.append(other.group) + else: + # If the other context had no match group of its own, collect all + # its matched values + self.group_keys = {**self.group_keys, **other.group_keys} + self.group_children += other.group_children + + self.index = other.index + # Propagate the last parsed group down the stack so it can be easily + # retrieved at the end of the process + if other.group: + self.last_group = other.group + elif other.last_group: + self.last_group = other.last_group + + + def start_group(self, ast_type): + """ Sets this context to have its own match group. """ + assert_true(self.group is None) + self.group = ParseGroup(ast_type) + + def set_group_val(self, key, value): + """ Sets a matched key=value pair on the current match group. """ + assert_true(key not in self.group_keys) + self.group_keys[key] = value + + + def create_parse_error(self, message): + """ Creates a ParseError identifying the current token index. """ + start_idx = self.start + while self.tokens[start_idx].type in _SKIP_TOKENS: + start_idx += 1 + + start_token = self.tokens[start_idx] + end_token = self.tokens[self.index] + return ParseError(message, start_token.start, end_token.end) + + + def skip(self): + """ Skips whitespace and comments. """ + while self.index < len(self.tokens) and self.tokens[self.index].type in _SKIP_TOKENS: + self.index += 1 + + def next_token(self) -> Token: + """ Advances the token iterator and returns the next token. """ + self.skip() + token = self.tokens[self.index] + self.index += 1 + return token + + +class ParseNode: + """ Base class for the nodes in the parser tree. """ + + def parse(self, ctx: ParseContext) -> ParseResult: + """ Attempts to match the ParseNode at the context's current location. """ + start_idx = ctx.index + inner_ctx = ctx.create_child() + + if self._parse(inner_ctx): + ctx.apply_child(inner_ctx) + if ctx.index == start_idx: + return ParseResult.EMPTY + else: + return ParseResult.SUCCESS + else: + return ParseResult.FAILURE + + def err(self, message): + """ Causes this ParseNode to raise an exception if it fails to parse. + This prevents the parser from backtracking, so you should understand + what it does and how the parser works before using it. """ + return Err(self, message) + + def expected(self, expect): + """ Convenience method for err(). """ + return self.err("Expected " + expect) + + def recover(self): + """ Causes the parser to try to recover, even if the ParseNode raises + an error. Recovery will log the error so it's still displayed, but + skip ahead to the next token in _RECOVERY_TOKENS to try to recover + parsing. + + This is important because it allows us to report multiple errors at + once in most cases, rather than making the user recompile after + fixing each issue. """ + return Recover(self) + + +class Err(ParseNode): + """ ParseNode that emits a compile error if it fails to parse. """ + + def __init__(self, child, message): + self.child = child + self.message = message + + def _parse(self, ctx): + if self.child.parse(ctx).failed(): + raise ctx.create_parse_error(self.message) + return True + + +class Recover(ParseNode): + """ ParseNode that attempts to recover parsing if an error is emitted. """ + def __init__(self, child): + self.child = child + + def _parse(self, ctx: ParseContext) -> bool: + try: + return self.child.parse(ctx).succeeded() + except CompileError as e: + ctx.errors.append(e) + while ctx.next_token().type not in _RECOVER_TOKENS: + pass + return True + + +class Group(ParseNode): + """ ParseNode that creates a match group. """ + def __init__(self, ast_type, child): + self.ast_type = ast_type + self.child = child + + def _parse(self, ctx: ParseContext) -> bool: + ctx.start_group(self.ast_type) + return self.child.parse(ctx).succeeded() + + +class Sequence(ParseNode): + """ ParseNode that attempts to match all of its children in sequence. """ + def __init__(self, *children): + self.children = children + + def _parse(self, ctx) -> bool: + for child in self.children: + if child.parse(ctx).failed(): + return False + return True + + +class AnyOf(ParseNode): + """ ParseNode that attempts to match exactly one of its children. Child + nodes are attempted in order. """ + def __init__(self, *children): + self.children = children + + def _parse(self, ctx): + for child in self.children: + if child.parse(ctx).succeeded(): + return True + return False + + +class ZeroOrMore(ParseNode): + """ ParseNode that matches its child any number of times (including zero + times). It cannot fail to parse. """ + def __init__(self, child): + self.child = child + + def _parse(self, ctx): + while self.child.parse(ctx).matched(): + pass + return True + + +class Optional(ParseNode): + """ ParseNode that matches its child zero or one times. It cannot fail to + parse. """ + def __init__(self, child): + self.child = child + + def _parse(self, ctx): + self.child.parse(ctx) + return True + + +class Directive(ParseNode): + """ ParseNode that matches a directive with the given name. """ + def __init__(self, name): + self.name = name + + def _parse(self, ctx: ParseContext): + return ctx.next_token().is_directive(self.name) + + +class StaticToken(ParseNode): + """ Base class for ParseNodes that match a token type without inspecting + the token's contents. """ + token_type = None + + def _parse(self, ctx: ParseContext) -> bool: + return ctx.next_token().type == self.token_type + +class StmtEnd(StaticToken): + token_type = TokenType.STMT_END + +class Eof(StaticToken): + token_type = TokenType.EOF + +class OpenBracket(StaticToken): + token_type = TokenType.OPEN_BRACKET + +class CloseBracket(StaticToken): + token_type = TokenType.CLOSE_BRACKET + +class OpenBlock(StaticToken): + token_type = TokenType.OPEN_BLOCK + +class CloseBlock(StaticToken): + token_type = TokenType.CLOSE_BLOCK + +class OpenParen(StaticToken): + token_type = TokenType.OPEN_PAREN + +class CloseParen(StaticToken): + token_type = TokenType.CLOSE_PAREN + + +class Op(ParseNode): + """ ParseNode that matches the given operator. """ + def __init__(self, op): + self.op = op + + def _parse(self, ctx: ParseContext) -> bool: + token = ctx.next_token() + if token.type != TokenType.OP: + return False + return str(token) == self.op + + +class UseIdent(ParseNode): + """ ParseNode that matches any identifier and sets it in a key=value pair on + the containing match group. """ + def __init__(self, key): + self.key = key + + def _parse(self, ctx: ParseContext): + token = ctx.next_token() + if token.type != TokenType.IDENT: + return False + + ctx.set_group_val(self.key, str(token)) + return True + + +class UseNumber(ParseNode): + """ ParseNode that matches a number and sets it in a key=value pair on + the containing match group. """ + def __init__(self, key, keep_trailing_decimal=False): + self.key = key + self.keep_trailing_decimal = keep_trailing_decimal + + def _parse(self, ctx: ParseContext): + token = ctx.next_token() + if token.type != TokenType.NUMBER: + return False + + number = token.get_number() + if not self.keep_trailing_decimal and number % 1.0 == 0: + number = int(number) + ctx.set_group_val(self.key, number) + return True + + +class UseQuoted(ParseNode): + """ ParseNode that matches a quoted string and sets it in a key=value pair + on the containing match group. """ + def __init__(self, key): + self.key = key + + def _parse(self, ctx: ParseContext): + token = ctx.next_token() + if token.type != TokenType.QUOTED: + return False + + string = (str(token)[1:-1] + .replace("\\n", "\n") + .replace("\\\"", "\"") + .replace("\\\\", "\\")) + ctx.set_group_val(self.key, string) + return True + + +class UseLiteral(ParseNode): + """ ParseNode that doesn't match anything, but rather sets a static key=value + pair on the containing group. Useful for, e.g., property and signal flags: + `Sequence(Keyword("swapped"), UseLiteral("swapped", True))` """ + def __init__(self, key, literal): + self.key = key + self.literal = literal + + def _parse(self, ctx: ParseContext): + ctx.set_group_val(self.key, self.literal) + return True + + +class Keyword(ParseNode): + """ Matches the given identifier. """ + def __init__(self, kw): + self.kw = kw + + def _parse(self, ctx: ParseContext): + token = ctx.next_token() + if token.type != TokenType.IDENT: + return False + + return str(token) == self.kw diff --git a/gtkblueprinttool/parser.py b/gtkblueprinttool/parser.py new file mode 100644 index 0000000..38b0e63 --- /dev/null +++ b/gtkblueprinttool/parser.py @@ -0,0 +1,188 @@ +# parser.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from . import ast +from .errors import MultipleErrors, ParseError +from .parse_tree import * +from .tokenizer import TokenType + + +def parse(tokens) -> ast.UI: + """ Parses a list of tokens into an abstract syntax tree. """ + + gtk_directive = Group( + ast.GtkDirective, + Sequence( + Directive("gtk"), + UseNumber("version", True).expected("a version number for GTK"), + StmtEnd().expected("`;`"), + ) + ) + + import_statement = Group( + ast.Import, + Sequence( + Directive("import"), + UseIdent("namespace").expected("a GIR namespace"), + UseNumber("version", True).expected("a version number"), + StmtEnd().expected("`;`"), + ) + ).recover() + + class_name = AnyOf( + Sequence( + UseIdent("namespace"), + Op("."), + UseIdent("class_name"), + ), + UseIdent("class_name"), + ) + + value = AnyOf( + Sequence( + Keyword("_"), + OpenParen(), + UseQuoted("value").expected("a quoted string"), + CloseParen().expected("`)`"), + UseLiteral("translatable", True), + ), + Sequence(Keyword("True"), UseLiteral("value", True)), + Sequence(Keyword("true"), UseLiteral("value", True)), + Sequence(Keyword("Yes"), UseLiteral("value", True)), + Sequence(Keyword("yes"), UseLiteral("value", True)), + Sequence(Keyword("False"), UseLiteral("value", False)), + Sequence(Keyword("false"), UseLiteral("value", False)), + Sequence(Keyword("No"), UseLiteral("value", False)), + Sequence(Keyword("no"), UseLiteral("value", False)), + UseIdent("value"), + UseNumber("value"), + UseQuoted("value"), + ) + + property = Group( + ast.Property, + Sequence( + UseIdent("name"), + Op(":"), + value.expected("a value"), + StmtEnd().expected("`;`"), + ) + ).recover() + + binding = Group( + ast.Property, + Sequence( + UseIdent("name"), + Op(":="), + UseIdent("bind_source").expected("the ID of a source object to bind from"), + Op("."), + UseIdent("bind_property").expected("a property name to bind from"), + StmtEnd().expected("`;`"), + ) + ).recover() + + signal = Group( + ast.Signal, + Sequence( + UseIdent("name"), + Optional(Sequence( + Op("::"), + UseIdent("detail_name").expected("a signal detail name"), + )), + Op("=>"), + UseIdent("handler").expected("the name of a function to handle the signal"), + OpenParen().expected("argument list"), + CloseParen().expected("`)`"), + ZeroOrMore(AnyOf( + Sequence(Keyword("swapped"), UseLiteral("swapped", True)), + Sequence(Keyword("after"), UseLiteral("after", True)), + Sequence(Keyword("object"), UseLiteral("object", True)), + )), + StmtEnd().expected("`;`"), + ) + ).recover() + + object = Group( + ast.Object, + None + ) + + child = Group( + ast.Child, + Sequence( + Optional(Sequence( + OpenBracket(), + UseIdent("child_type").expected("a child type"), + CloseBracket(), + )), + object, + ) + ) + + object_content = Group( + ast.ObjectContent, + Sequence( + OpenBlock(), + ZeroOrMore(AnyOf( + property, + binding, + signal, + child, + )), + CloseBlock().err("Could not understand statement"), + ) + ) + + # work around the recursive reference + object.child = Sequence( + class_name, + Optional(UseIdent("id")), + object_content.expected("block"), + ) + + template = Group( + ast.Template, + Sequence( + Directive("template"), + UseIdent("name").expected("template class name"), + Op(":").expected("`:`"), + class_name.expected("parent class"), + object_content.expected("block"), + ) + ) + + ui = Group( + ast.UI, + Sequence( + gtk_directive.err("File must start with a @gtk directive (e.g. `@gtk 4.0;`)"), + ZeroOrMore(import_statement), + ZeroOrMore(AnyOf( + template, + object, + )), + Eof().err("Failed to parse the rest of the file"), + ) + ).recover() + + ctx = ParseContext(tokens) + ui.parse(ctx) + if len(ctx.errors): + raise MultipleErrors(ctx.errors) + return ctx.last_group.to_ast() diff --git a/gtkblueprinttool/pipeline.py b/gtkblueprinttool/pipeline.py new file mode 100644 index 0000000..92c587c --- /dev/null +++ b/gtkblueprinttool/pipeline.py @@ -0,0 +1,86 @@ +# pipeline.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from . import ast, parser, tokenizer, xml_emitter + + +class Pipeline: + """ Represents the pipeline from blueprint code to XML, through the + tokenizer and abstract syntax tree steps. Setting any step + automatically updates the later steps. """ + + def __init__(self, string=None): + self._string = string + self._tokens = None + self._ast = None + self._xml = None + + @property + def string(self) -> str: + """ Blueprint code """ + return self._string + @string.setter + def string(self, new_val): + self._reset() + self._string = new_val + + @property + def tokens(self) -> [tokenizer.Token]: + """ List of tokens """ + if self._tokens is None: + if self.string is not None: + self._tokens = tokenizer.tokenize(self._string) + return self._tokens + @tokens.setter + def tokens(self, new_val): + self._reset() + self._tokens = new_val + + @property + def ast(self) -> ast.UI: + """ Abstract syntax tree """ + if self._ast is None: + if self.tokens is not None: + self._ast = parser.parse_ast(self.tokens) + return self._ast + @ast.setter + def ast(self, new_val): + self._reset() + self._ast = new_val + + @property + def xml(self) -> str: + """ GtkBuilder XML string """ + if self._xml is None: + if self.ast is not None: + emitter = xml_emitter.XmlEmitter() + self.ast.generate(emitter) + self._xml = emitter.result + return self._xml + @xml.setter + def xml(self, new_val): + self._reset() + self._xml = new_val + + + def _reset(self): + self._string = None + self._tokens = None + diff --git a/gtkblueprinttool/tokenizer.py b/gtkblueprinttool/tokenizer.py new file mode 100644 index 0000000..e3fd32f --- /dev/null +++ b/gtkblueprinttool/tokenizer.py @@ -0,0 +1,115 @@ +# tokenizer.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import re +from enum import Enum + +from .errors import TokenizeError + + +class TokenType(Enum): + EOF = 0 + DIRECTIVE = 1 + IDENT = 2 + QUOTED = 3 + NUMBER = 4 + OPEN_PAREN = 5 + CLOSE_PAREN = 6 + OPEN_BLOCK = 7 + CLOSE_BLOCK = 8 + STMT_END = 9 + OP = 10 + WHITESPACE = 11 + COMMENT = 12 + OPEN_BRACKET = 13 + CLOSE_BRACKET = 14 + COMMA = 15 + + +_TOKENS = [ + (TokenType.DIRECTIVE, r"@[\d\w\-_]+"), + (TokenType.IDENT, r"[A-Za-z_][\d\w\-_]*"), + (TokenType.QUOTED, r'"(\\"|[^"\n])+"'), + (TokenType.QUOTED, r"'(\\'|[^'\n])+'"), + (TokenType.NUMBER, r"[-+]?[\d_]+(\.[\d_]+)?"), + (TokenType.NUMBER, r"0x[A-Fa-f0-9]+"), + (TokenType.OPEN_PAREN, r"\("), + (TokenType.CLOSE_PAREN, r"\)"), + (TokenType.OPEN_BLOCK, r"\{"), + (TokenType.CLOSE_BLOCK, r"\}"), + (TokenType.STMT_END, r";"), + (TokenType.OP, r"[:=\.=\|<>]+"), + (TokenType.WHITESPACE, r"\s+"), + (TokenType.COMMENT, r"\/\*.*?\*\/"), + (TokenType.COMMENT, r"\/\/[^\n]*"), + (TokenType.OPEN_BRACKET, r"\["), + (TokenType.CLOSE_BRACKET, r"\]"), + (TokenType.COMMA, r"\,"), +] +_TOKENS = [(type, re.compile(regex)) for (type, regex) in _TOKENS] + + +class Token: + def __init__(self, type, start, end, string): + self.type = type + self.start = start + self.end = end + self.string = string + + def __str__(self): + return self.string[self.start:self.end] + + def is_directive(self, directive) -> bool: + if self.type != TokenType.DIRECTIVE: + return False + return str(self) == "@" + directive + + def get_number(self): + if self.type != TokenType.NUMBER: + return None + + string = str(self) + if string.startswith("0x"): + return int(string, 16) + else: + return float(string) + + +def _tokenize(ui_ml: str): + i = 0 + while i < len(ui_ml): + matched = False + for (type, regex) in _TOKENS: + match = regex.match(ui_ml, i) + + if match is not None: + yield Token(type, match.start(), match.end(), ui_ml) + i = match.end() + matched = True + break + + if not matched: + raise TokenizeError(i) + + yield Token(TokenType.EOF, i, i, ui_ml) + + +def tokenize(data: str) -> [Token]: + return list(_tokenize(data)) diff --git a/gtkblueprinttool/xml_emitter.py b/gtkblueprinttool/xml_emitter.py new file mode 100644 index 0000000..bde2e8d --- /dev/null +++ b/gtkblueprinttool/xml_emitter.py @@ -0,0 +1,63 @@ +# xml_emitter.py +# +# Copyright 2021 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +from xml.sax import saxutils + + +class XmlEmitter: + def __init__(self, indent=2): + self.indent = indent + self.result = '' + self._tag_stack = [] + self._needs_newline = False + + def start_tag(self, tag, **attrs): + self._indent() + self.result += f"<{tag}" + for key, val in attrs.items(): + if val is not None: + self.result += f' {key}="{saxutils.escape(str(val))}"' + self.result += ">" + self._tag_stack.append(tag) + self._needs_newline = False + + def put_self_closing(self, tag, **attrs): + self._indent() + self.result += f"<{tag}" + for key, val in attrs.items(): + if val is not None: + self.result += f' {key}="{saxutils.escape(str(val))}"' + self.result += "/>" + self._needs_newline = True + + def end_tag(self): + tag = self._tag_stack.pop() + if self._needs_newline: + self._indent() + self.result += f"" + self._needs_newline = True + + def put_text(self, text): + self.result += saxutils.escape(str(text)) + self._needs_newline = False + + def _indent(self): + if self.indent is not None: + self.result += "\n" + " " * (self.indent * len(self._tag_stack))