Add a formatter

2025-07-14 02:19:10 -04:00 · 2023-11-03 06:48:28 -05:00 · 2023-11-03 06:48:28 -05:00 · 4fa64cdf33
commit 4fa64cdf33
parent 2faa9207de
12 changed files with 613 additions and 11 deletions
--- a/blueprintcompiler/formatter.py
+++ b/blueprintcompiler/formatter.py
@ -0,0 +1,225 @@
+# formatter.py
+#
+# Copyright 2023 Gregor Niehl <gregorniehl@web.de>
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 3 of the
+# License, or (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import re
+from enum import Enum
+
+from . import tokenizer, utils
+from .tokenizer import TokenType
+
+OPENING_TOKENS = ("{", "[")
+CLOSING_TOKENS = ("}", "]")
+
+NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
+
+NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
+NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
+
+# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
+WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
+WHITESPACE_BEFORE = ("{", "|")
+
+
+class LineType(Enum):
+    STATEMENT = 0
+    BLOCK_OPEN = 1
+    BLOCK_CLOSE = 2
+    CHILD_TYPE = 3
+    COMMENT = 4
+
+
+class Formatter:
+    def format(data, tab_size=2, insert_space=True):
+        indent_levels = 0
+        tokens = tokenizer.tokenize(data)
+        end_str = ""
+        last_not_whitespace = tokens[0]
+        current_line = ""
+        prev_line_type = None
+        is_child_type = False
+        indent_item = " " * tab_size if insert_space else "\t"
+        watch_parentheses = False
+        parentheses_balance = 0
+        bracket_tracker = [None]
+
+        def commit_current_line(
+            line_type=prev_line_type, redo_whitespace=False, newlines_before=1
+        ):
+            nonlocal end_str, current_line, prev_line_type
+
+            indent_whitespace = indent_levels * indent_item
+            whitespace_to_add = "\n" + indent_whitespace
+
+            if redo_whitespace or newlines_before != 1:
+                end_str = end_str.strip() + ("\n" * newlines_before)
+                if newlines_before > 0:
+                    end_str += indent_whitespace
+
+            end_str += current_line + whitespace_to_add
+
+            current_line = ""
+            prev_line_type = line_type
+
+        for item in tokens:
+            if item.type != TokenType.WHITESPACE:
+                str_item = str(item)
+
+                whitespace_required = (
+                    str_item in WHITESPACE_BEFORE
+                    or str(last_not_whitespace) in WHITESPACE_AFTER
+                )
+                whitespace_blockers = (
+                    str_item in NO_WHITESPACE_BEFORE
+                    or str(last_not_whitespace) in NO_WHITESPACE_AFTER
+                )
+
+                this_or_last_is_ident = (
+                    item.type == TokenType.IDENT
+                    or last_not_whitespace.type == TokenType.IDENT
+                )
+                current_line_is_empty = len(current_line) == 0
+                just_one_word_on_line = re.match(r"^([A-Za-z_\-])+$", current_line)
+                is_short_object_def = str_item == "(" and not just_one_word_on_line
+
+                any_blockers = (
+                    whitespace_blockers or current_line_is_empty or is_short_object_def
+                )
+
+                if (whitespace_required or this_or_last_is_ident) and not any_blockers:
+                    current_line += " "
+
+                current_line += str_item
+
+                if str_item in ["[", "("]:
+                    bracket_tracker.append(str_item)
+                elif str_item in ["]", ")"]:
+                    bracket_tracker.pop()
+
+                needs_newline_treatment = (
+                    str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
+                )
+                if needs_newline_treatment:
+                    if str_item in OPENING_TOKENS:
+                        list_or_child_type = str_item == "["
+                        if list_or_child_type:
+                            is_child_type = current_line.startswith("[")
+
+                            if is_child_type:
+                                if str(last_not_whitespace) not in OPENING_TOKENS:
+                                    end_str = (
+                                        end_str.strip()
+                                        + "\n\n"
+                                        + (indent_item * indent_levels)
+                                    )
+                                last_not_whitespace = item
+                                continue
+
+                        indent_levels += 1
+                        keep_same_indent = not prev_line_type in [
+                            LineType.CHILD_TYPE,
+                            LineType.COMMENT,
+                            LineType.BLOCK_OPEN,
+                        ]
+                        if keep_same_indent:
+                            end_str = (
+                                end_str.strip()
+                                + "\n\n"
+                                + (indent_item * (indent_levels - 1))
+                            )
+                        commit_current_line(LineType.BLOCK_OPEN)
+
+                    elif str_item == "]" and is_child_type:
+                        commit_current_line(LineType.CHILD_TYPE, False)
+                        is_child_type = False
+
+                    elif str_item in CLOSING_TOKENS:
+                        if str_item == "]" and last_not_whitespace != ",":
+                            current_line = current_line[:-1]
+                            commit_current_line()
+                            current_line = "]"
+
+                        indent_levels -= 1
+                        commit_current_line(LineType.BLOCK_CLOSE, True)
+
+                    elif str_item == ";":
+                        line_type = LineType.STATEMENT
+                        if len(current_line) == 1:
+                            newlines = 0
+                            line_type = LineType.BLOCK_CLOSE
+                        elif prev_line_type == LineType.BLOCK_CLOSE:
+                            newlines = 2
+                        else:
+                            newlines = 1
+
+                        commit_current_line(line_type, newlines_before=newlines)
+
+                    elif item.type == TokenType.COMMENT:
+                        require_extra_newline = [
+                            LineType.BLOCK_CLOSE,
+                            LineType.STATEMENT,
+                            LineType.COMMENT,
+                        ]
+
+                        single_line_comment = str_item.startswith("//")
+                        if (
+                            single_line_comment
+                            and prev_line_type == LineType.BLOCK_CLOSE
+                        ) or (
+                            not single_line_comment
+                            and prev_line_type in require_extra_newline
+                        ):
+                            newlines = 2
+                        else:
+                            newlines = 1
+
+                        commit_current_line(LineType.COMMENT, newlines_before=newlines)
+
+                    else:
+                        commit_current_line()
+
+                elif str_item == "(" and (
+                    re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
+                ):
+                    watch_parentheses = True
+                    parentheses_balance += 1
+
+                elif str_item == ")" and watch_parentheses:
+                    parentheses_balance -= 1
+                    all_parentheses_closed = parentheses_balance == 0
+                    if all_parentheses_closed:
+                        commit_current_line(
+                            newlines_before=2
+                            if prev_line_type == LineType.BLOCK_CLOSE
+                            else 1
+                        )
+                        watch_parentheses = False
+
+                tracker_is_empty = len(bracket_tracker) > 0
+                if tracker_is_empty:
+                    last_in_tracker = bracket_tracker[-1]
+                    is_list_comma = last_in_tracker == "[" and str_item == ","
+                    if is_list_comma:
+                        last_was_list_item = end_str.strip()[-1] not in ["[", ","]
+                        if last_was_list_item:
+                            end_str = end_str.strip()
+                        commit_current_line()
+
+                last_not_whitespace = item
+
+        return end_str
--- a/blueprintcompiler/lsp.py
+++ b/blueprintcompiler/lsp.py
@ -22,11 +22,13 @@ import json
 import sys
 import traceback
 import typing as T
+from difflib import SequenceMatcher

 from . import decompiler, parser, tokenizer, utils, xml_reader
 from .ast_utils import AstNode
 from .completions import complete
 from .errors import CompileError, MultipleErrors
+from .formatter import Formatter
 from .lsp_utils import *
 from .outputs.xml import XmlOutput
 from .tokenizer import Token
@ -211,6 +213,7 @@ class LanguageServer:
                    "hoverProvider": True,
                    "documentSymbolProvider": True,
                    "definitionProvider": True,
+                    "documentFormattingProvider": True,
                },
                "serverInfo": {
                    "name": "Blueprint",
@ -280,6 +283,38 @@ class LanguageServer:
            id, [completion.to_json(True) for completion in completions]
        )

+    @command("textDocument/formatting")
+    def formatting(self, id, params):
+        open_file = self._open_files[params["textDocument"]["uri"]]
+
+        if open_file.text is None:
+            self._send_error(id, ErrorCode.RequestFailed, "Document is not open")
+            return
+
+        try:
+            formatted_blp = Format.format(
+                open_file.text,
+                params["options"]["tabSize"],
+                params["options"]["insertSpaces"],
+            )
+        except PrintableError:
+            self._send_error(id, ErrorCode.RequestFailed, "Could not format document")
+            return
+
+        lst = []
+        for tag, i1, i2, j1, j2 in SequenceMatcher(
+            None, open_file.text, formatted_blp
+        ).get_opcodes():
+            if tag in ("replace", "insert", "delete"):
+                lst.append(
+                    TextEdit(
+                        Range(i1, i2, open_file.text),
+                        "" if tag == "delete" else formatted_blp[j1:j2],
+                    ).to_json()
+                )
+
+        self._send_response(id, lst)
+
    @command("textDocument/x-blueprint-compile")
    def compile(self, id, params):
        open_file = self._open_files[params["textDocument"]["uri"]]
--- a/blueprintcompiler/lsp_utils.py
+++ b/blueprintcompiler/lsp_utils.py
@ -186,3 +186,12 @@ class LocationLink:
            "targetRange": self.target_range.to_json(),
            "targetSelectionRange": self.target_selection_range.to_json(),
        }
+
+
+@dataclass
+class TextEdit:
+    range: Range
+    newText: str
+
+    def to_json(self):
+        return {"range": self.range.to_json(), "newText": self.newText}
--- a/blueprintcompiler/main.py
+++ b/blueprintcompiler/main.py
@ -19,13 +19,14 @@


 import argparse
-import json
+import difflib
 import os
 import sys
 import typing as T

 from . import interactive_port, parser, tokenizer
 from .errors import CompileError, CompilerBugError, PrintableError, report_bug
+from .formatter import Formatter
 from .gir import add_typelib_search_path
 from .lsp import LanguageServer
 from .outputs import XmlOutput
@ -66,6 +67,36 @@ class BlueprintApp:
            type=argparse.FileType("r"),
        )

+        format = self.add_subcommand(
+            "format", "Format given blueprint files", self.cmd_format
+        )
+        format.add_argument(
+            "-f",
+            "--fix",
+            help="Apply the edits to the files",
+            default=False,
+            action="store_true",
+        )
+        format.add_argument(
+            "-t",
+            "--tabs",
+            help="Use tabs instead of spaces",
+            default=False,
+            action="store_true",
+        )
+        format.add_argument(
+            "-s",
+            "--spaces-num",
+            help="How many spaces should be used per indent",
+            default=2,
+            type=int,
+        )
+        format.add_argument(
+            "inputs",
+            nargs="+",
+            metavar="filenames",
+        )
+
        port = self.add_subcommand("port", "Interactive porting tool", self.cmd_port)

        lsp = self.add_subcommand(
@ -152,6 +183,111 @@ class BlueprintApp:
                e.pretty_print(file.name, data)
                sys.exit(1)

+    def cmd_format(self, opts):
+        input_files = []
+        missing_files = []
+        panic = False
+        formatted_files = 0
+        skipped_files = 0
+
+        for path in opts.inputs:
+            if os.path.isfile(path):
+                input_files.append(path)
+            elif os.path.isdir(path):
+                for root, subfolders, files in os.walk(path):
+                    for file in files:
+                        if file.endswith(".blp"):
+                            input_files.append(os.path.join(root, file))
+            else:
+                missing_files.append(path)
+
+        for file in input_files:
+            with open(file, "r+") as file:
+                data = file.read()
+                errored = False
+
+                try:
+                    self._compile(data)
+                except:
+                    errored = True
+
+                formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
+
+                if data != formatted_str:
+                    happened = "Would format"
+
+                    if opts.fix and not errored:
+                        file.seek(0)
+                        file.truncate()
+                        file.write(formatted_str)
+                        happened = "Formatted"
+
+                    diff_lines = []
+                    a_lines = data.splitlines(keepends=True)
+                    b_lines = formatted_str.splitlines(keepends=True)
+
+                    for line in difflib.unified_diff(
+                        a_lines, b_lines, fromfile=file.name, tofile=file.name, n=5
+                    ):
+                        # Work around https://bugs.python.org/issue2142
+                        # See:
+                        # https://www.gnu.org/software/diffutils/manual/html_node/Incomplete-Lines.html
+                        if line[-1] == "\n":
+                            diff_lines.append(line)
+                        else:
+                            diff_lines.append(line + "\n")
+                            diff_lines.append("\\ No newline at end of file\n")
+
+                    print("".join(diff_lines))
+                    to_print = Colors.BOLD
+                    if errored:
+                        to_print += f"{Colors.RED}Skipped {file.name}: Will not overwrite file with compile errors"
+                        panic = True
+                        skipped_files += 1
+                    else:
+                        to_print += f"{happened} {file.name}"
+                        formatted_files += 1
+
+                    print(to_print)
+                    print(Colors.CLEAR)
+
+        missing_num = len(missing_files)
+        summary = ""
+
+        if missing_num > 0:
+            print(
+                f"{Colors.BOLD}{Colors.RED}Could not find files:{Colors.CLEAR}{Colors.BOLD}"
+            )
+            for path in missing_files:
+                print(f"  {path}")
+            print(Colors.CLEAR)
+            panic = True
+
+        def would_be(verb):
+            return verb if opts.fix else f"would be {verb}"
+
+        def how_many(count, bold=True):
+            string = f"{Colors.BLUE}{count} {'files' if count != 1 else 'file'}{Colors.CLEAR}"
+            return Colors.BOLD + string + Colors.BOLD if bold else Colors.CLEAR + string
+
+        if formatted_files > 0:
+            summary += f"{how_many(formatted_files)} {would_be('formatted')}, "
+            panic = panic or not opts.fix
+
+        left_files = len(input_files) - formatted_files - skipped_files
+        summary += f"{how_many(left_files, False)} {would_be('left unchanged')}"
+
+        if skipped_files > 0:
+            summary += f", {how_many(skipped_files)} {would_be('skipped')}"
+
+        if missing_num > 0:
+            summary += f", {how_many(missing_num)} not found"
+
+        print(summary + Colors.CLEAR)
+
+        if panic:
+            sys.exit(1)
+
    def cmd_lsp(self, opts):
        langserv = LanguageServer()
        langserv.run()
--- a/docs/conf.py
+++ b/docs/conf.py
@ -17,9 +17,9 @@

 # -- Project information -----------------------------------------------------

-project = 'Blueprint'
-copyright = '2021-2023, James Westman'
-author = 'James Westman'
+project = "Blueprint"
+copyright = "2021-2023, James Westman"
+author = "James Westman"


 # -- General configuration ---------------------------------------------------
@ -27,16 +27,15 @@ author = 'James Westman'
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-]
+extensions = []

 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]


 # -- Options for HTML output -------------------------------------------------
@ -44,11 +43,11 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'furo'
+html_theme = "furo"

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]

-html_css_files = ['styles.css']
+html_css_files = ["styles.css"]
--- a/tests/formatting/correct1.blp
+++ b/tests/formatting/correct1.blp
@ -0,0 +1,71 @@
+using Gtk 4.0;
+using Adw 1;
+
+template $MyTemplate: Label {
+  /**
+   * A list of strings.
+   */
+  StringList {
+    // comment
+    strings [
+      "Hello",
+      C_("Greeting", "World"),
+    ]
+  }
+
+  object: Button {
+    label: "Click me";
+  };
+
+  flags: a | b;
+
+  [child]
+  Label {
+  }
+
+  [child]
+  Label label2 {
+  }
+
+  // Single line comment.
+
+  /**
+   * Multiline comment.
+   */
+  // Single line comment.
+  value: bind 1.0 as <double>;
+  as: 1;
+  signal => $on_signal() after;
+}
+
+Dialog {
+  [action response=ok]
+  $MyButton {
+  }
+}
+
+menu menu {
+  item ("test")
+
+  item {
+    label: "test";
+  }
+
+  item ("test")
+}
+
+Adw.MessageDialog {
+  responses [
+    save: "Save" suggested disabled,
+  ]
+}
+
+Adw.Breakpoint {
+  condition ("width < 100")
+
+  setters {
+    label2.label: _("Hello, world!");
+    label2.visible: false;
+    label2.extra-menu: null;
+  }
+}
--- a/tests/formatting/in1.blp
+++ b/tests/formatting/in1.blp
@ -0,0 +1 @@
+using Gtk 4.0;using Adw 1;Overlay{Label label{label:_("'Hello World!' \"\n\t\"");}[overlay]Button{notify::icon-name=>$on_icon_name_changed(label)swapped;styles["destructive"]}visible:bind $isVisible(label.visible,my-menu)as<bool>;width-request:bind label.width-request no-sync-create;}menu my-menu{item(_("Label"), "action-name", "icon-name")item{action:"win.format";}}
--- a/tests/formatting/in2.blp
+++ b/tests/formatting/in2.blp
@ -0,0 +1,40 @@
+using Gtk 4.0;
+
+    using Adw 1;
+
+Overlay {
+
+Label
+label
+{
+label
+:
+_
+(
+"'Hello World!' \"\n\t\""
+)
+;
+}
+[
+    overlay
+] Button
+{ notify
+:: icon-name
+=> $ on_icon_name_changed ( label )
+swapped ;
+styles
+[ "destructive" ]
+}
+visible
+: bind $ isVisible ( label.visible ,
+my-menu ) as
+ < bool > ; width-request : bind label . width-request no-sync-create ; }
+                menu my-menu
+{ item ( _ ( "Label" ) , "action-name" , "icon-name" ) item { action : "win.format" ; } }
+
+
+
+
+
+
+
--- a/tests/formatting/out.blp
+++ b/tests/formatting/out.blp
@ -0,0 +1,28 @@
+using Gtk 4.0;
+using Adw 1;
+
+Overlay {
+  Label label {
+    label: _("'Hello World!' \"\n\t\"");
+  }
+
+  [overlay]
+  Button {
+    notify::icon-name => $on_icon_name_changed(label) swapped;
+
+    styles [
+      "destructive"
+    ]
+  }
+
+  visible: bind $isVisible(label.visible, my-menu) as <bool>;
+  width-request: bind label.width-request no-sync-create;
+}
+
+menu my-menu {
+  item (_("Label"), "action-name", "icon-name")
+
+  item {
+    action: "win.format";
+  }
+}
--- a/tests/formatting/string_in.blp
+++ b/tests/formatting/string_in.blp
@ -0,0 +1,5 @@
+using Gtk 4.0;
+
+Label {
+  label: "\"'\'\t\n\\'";
+}
--- a/tests/formatting/string_out.blp
+++ b/tests/formatting/string_out.blp
@ -0,0 +1,5 @@
+using Gtk 4.0;
+
+Label {
+  label: "\"'\'\t\n\\'";
+}
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@ -0,0 +1,48 @@
+# test_formatter.py
+#
+# Copyright 2023 James Westman <james@jwestman.net>
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 3 of the
+# License, or (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+
+import unittest
+from pathlib import Path
+
+from blueprintcompiler.formatter import Formatter
+
+
+class TestFormatter(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.maxDiff = None
+
+    def assert_format_test(self, input_file, expected_file):
+        print("assert_format_test({}, {})".format(input_file, expected_file))
+        with open((Path(__file__).parent / f"formatting/{input_file}").resolve()) as f:
+            input_data = f.read()
+        with open(
+            (Path(__file__).parent / f"formatting/{expected_file}").resolve()
+        ) as f:
+            expected = f.read()
+
+        actual = Formatter.format(input_data)
+        self.assertEqual(actual, expected)
+
+    def test_formatter(self):
+        self.assert_format_test("in1.blp", "out.blp")
+        self.assert_format_test("in2.blp", "out.blp")
+        self.assert_format_test("correct1.blp", "correct1.blp")
+        self.assert_format_test("string_in.blp", "string_out.blp")
				`@ -0,0 +1 @@`
				`using Gtk 4.0;using Adw 1;Overlay{Label label{label:_("'Hello World!' \"\n\t\"");}[overlay]Button{notify::icon-name=>$on_icon_name_changed(label)swapped;styles["destructive"]}visible:bind $isVisible(label.visible,my-menu)as<bool>;width-request:bind label.width-request no-sync-create;}menu my-menu{item(_("Label"), "action-name", "icon-name")item{action:"win.format";}}`