Add a formatter

2025-07-13 02:19:08 -04:00 · 2023-11-03 06:48:28 -05:00 · 2023-11-03 06:48:28 -05:00 · 4fa64cdf33
commit 4fa64cdf33
parent 2faa9207de
12 changed files with 613 additions and 11 deletions
--- a/blueprintcompiler/formatter.py
+++ b/blueprintcompiler/formatter.py
@ -0,0 +1,225 @@
+# formatter.py
+#
+# Copyright 2023 Gregor Niehl <gregorniehl@web.de>
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 3 of the
+# License, or (at your option) any later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import re
+from enum import Enum
+
+from . import tokenizer, utils
+from .tokenizer import TokenType
+
+OPENING_TOKENS = ("{", "[")
+CLOSING_TOKENS = ("}", "]")
+
+NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
+
+NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
+NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
+
+# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
+WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
+WHITESPACE_BEFORE = ("{", "|")
+
+
+class LineType(Enum):
+    STATEMENT = 0
+    BLOCK_OPEN = 1
+    BLOCK_CLOSE = 2
+    CHILD_TYPE = 3
+    COMMENT = 4
+
+
+class Formatter:
+    def format(data, tab_size=2, insert_space=True):
+        indent_levels = 0
+        tokens = tokenizer.tokenize(data)
+        end_str = ""
+        last_not_whitespace = tokens[0]
+        current_line = ""
+        prev_line_type = None
+        is_child_type = False
+        indent_item = " " * tab_size if insert_space else "\t"
+        watch_parentheses = False
+        parentheses_balance = 0
+        bracket_tracker = [None]
+
+        def commit_current_line(
+            line_type=prev_line_type, redo_whitespace=False, newlines_before=1
+        ):
+            nonlocal end_str, current_line, prev_line_type
+
+            indent_whitespace = indent_levels * indent_item
+            whitespace_to_add = "\n" + indent_whitespace
+
+            if redo_whitespace or newlines_before != 1:
+                end_str = end_str.strip() + ("\n" * newlines_before)
+                if newlines_before > 0:
+                    end_str += indent_whitespace
+
+            end_str += current_line + whitespace_to_add
+
+            current_line = ""
+            prev_line_type = line_type
+
+        for item in tokens:
+            if item.type != TokenType.WHITESPACE:
+                str_item = str(item)
+
+                whitespace_required = (
+                    str_item in WHITESPACE_BEFORE
+                    or str(last_not_whitespace) in WHITESPACE_AFTER
+                )
+                whitespace_blockers = (
+                    str_item in NO_WHITESPACE_BEFORE
+                    or str(last_not_whitespace) in NO_WHITESPACE_AFTER
+                )
+
+                this_or_last_is_ident = (
+                    item.type == TokenType.IDENT
+                    or last_not_whitespace.type == TokenType.IDENT
+                )
+                current_line_is_empty = len(current_line) == 0
+                just_one_word_on_line = re.match(r"^([A-Za-z_\-])+$", current_line)
+                is_short_object_def = str_item == "(" and not just_one_word_on_line
+
+                any_blockers = (
+                    whitespace_blockers or current_line_is_empty or is_short_object_def
+                )
+
+                if (whitespace_required or this_or_last_is_ident) and not any_blockers:
+                    current_line += " "
+
+                current_line += str_item
+
+                if str_item in ["[", "("]:
+                    bracket_tracker.append(str_item)
+                elif str_item in ["]", ")"]:
+                    bracket_tracker.pop()
+
+                needs_newline_treatment = (
+                    str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
+                )
+                if needs_newline_treatment:
+                    if str_item in OPENING_TOKENS:
+                        list_or_child_type = str_item == "["
+                        if list_or_child_type:
+                            is_child_type = current_line.startswith("[")
+
+                            if is_child_type:
+                                if str(last_not_whitespace) not in OPENING_TOKENS:
+                                    end_str = (
+                                        end_str.strip()
+                                        + "\n\n"
+                                        + (indent_item * indent_levels)
+                                    )
+                                last_not_whitespace = item
+                                continue
+
+                        indent_levels += 1
+                        keep_same_indent = not prev_line_type in [
+                            LineType.CHILD_TYPE,
+                            LineType.COMMENT,
+                            LineType.BLOCK_OPEN,
+                        ]
+                        if keep_same_indent:
+                            end_str = (
+                                end_str.strip()
+                                + "\n\n"
+                                + (indent_item * (indent_levels - 1))
+                            )
+                        commit_current_line(LineType.BLOCK_OPEN)
+
+                    elif str_item == "]" and is_child_type:
+                        commit_current_line(LineType.CHILD_TYPE, False)
+                        is_child_type = False
+
+                    elif str_item in CLOSING_TOKENS:
+                        if str_item == "]" and last_not_whitespace != ",":
+                            current_line = current_line[:-1]
+                            commit_current_line()
+                            current_line = "]"
+
+                        indent_levels -= 1
+                        commit_current_line(LineType.BLOCK_CLOSE, True)
+
+                    elif str_item == ";":
+                        line_type = LineType.STATEMENT
+                        if len(current_line) == 1:
+                            newlines = 0
+                            line_type = LineType.BLOCK_CLOSE
+                        elif prev_line_type == LineType.BLOCK_CLOSE:
+                            newlines = 2
+                        else:
+                            newlines = 1
+
+                        commit_current_line(line_type, newlines_before=newlines)
+
+                    elif item.type == TokenType.COMMENT:
+                        require_extra_newline = [
+                            LineType.BLOCK_CLOSE,
+                            LineType.STATEMENT,
+                            LineType.COMMENT,
+                        ]
+
+                        single_line_comment = str_item.startswith("//")
+                        if (
+                            single_line_comment
+                            and prev_line_type == LineType.BLOCK_CLOSE
+                        ) or (
+                            not single_line_comment
+                            and prev_line_type in require_extra_newline
+                        ):
+                            newlines = 2
+                        else:
+                            newlines = 1
+
+                        commit_current_line(LineType.COMMENT, newlines_before=newlines)
+
+                    else:
+                        commit_current_line()
+
+                elif str_item == "(" and (
+                    re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
+                ):
+                    watch_parentheses = True
+                    parentheses_balance += 1
+
+                elif str_item == ")" and watch_parentheses:
+                    parentheses_balance -= 1
+                    all_parentheses_closed = parentheses_balance == 0
+                    if all_parentheses_closed:
+                        commit_current_line(
+                            newlines_before=2
+                            if prev_line_type == LineType.BLOCK_CLOSE
+                            else 1
+                        )
+                        watch_parentheses = False
+
+                tracker_is_empty = len(bracket_tracker) > 0
+                if tracker_is_empty:
+                    last_in_tracker = bracket_tracker[-1]
+                    is_list_comma = last_in_tracker == "[" and str_item == ","
+                    if is_list_comma:
+                        last_was_list_item = end_str.strip()[-1] not in ["[", ","]
+                        if last_was_list_item:
+                            end_str = end_str.strip()
+                        commit_current_line()
+
+                last_not_whitespace = item
+
+        return end_str
--- a/blueprintcompiler/lsp.py
+++ b/blueprintcompiler/lsp.py
@ -22,11 +22,13 @@ import json
 import sys
 import traceback
 import typing as T
+from difflib import SequenceMatcher

 from . import decompiler, parser, tokenizer, utils, xml_reader
 from .ast_utils import AstNode
 from .completions import complete
 from .errors import CompileError, MultipleErrors
+from .formatter import Formatter
 from .lsp_utils import *
 from .outputs.xml import XmlOutput
 from .tokenizer import Token
@ -211,6 +213,7 @@ class LanguageServer:
                    "hoverProvider": True,
                    "documentSymbolProvider": True,
                    "definitionProvider": True,
+                    "documentFormattingProvider": True,
                },
                "serverInfo": {
                    "name": "Blueprint",
@ -280,6 +283,38 @@ class LanguageServer:
            id, [completion.to_json(True) for completion in completions]
        )

+    @command("textDocument/formatting")
+    def formatting(self, id, params):
+        open_file = self._open_files[params["textDocument"]["uri"]]
+
+        if open_file.text is None:
+            self._send_error(id, ErrorCode.RequestFailed, "Document is not open")
+            return
+
+        try:
+            formatted_blp = Format.format(
+                open_file.text,
+                params["options"]["tabSize"],
+                params["options"]["insertSpaces"],
+            )
+        except PrintableError:
+            self._send_error(id, ErrorCode.RequestFailed, "Could not format document")
+            return
+
+        lst = []
+        for tag, i1, i2, j1, j2 in SequenceMatcher(
+            None, open_file.text, formatted_blp
+        ).get_opcodes():
+            if tag in ("replace", "insert", "delete"):
+                lst.append(
+                    TextEdit(
+                        Range(i1, i2, open_file.text),
+                        "" if tag == "delete" else formatted_blp[j1:j2],
+                    ).to_json()
+                )
+
+        self._send_response(id, lst)
+
    @command("textDocument/x-blueprint-compile")
    def compile(self, id, params):
        open_file = self._open_files[params["textDocument"]["uri"]]
--- a/blueprintcompiler/lsp_utils.py
+++ b/blueprintcompiler/lsp_utils.py
@ -186,3 +186,12 @@ class LocationLink:
            "targetRange": self.target_range.to_json(),
            "targetSelectionRange": self.target_selection_range.to_json(),
        }
+
+
+@dataclass
+class TextEdit:
+    range: Range
+    newText: str
+
+    def to_json(self):
+        return {"range": self.range.to_json(), "newText": self.newText}
--- a/blueprintcompiler/main.py
+++ b/blueprintcompiler/main.py
@ -19,13 +19,14 @@


 import argparse
-import json
+import difflib
 import os
 import sys
 import typing as T

 from . import interactive_port, parser, tokenizer
 from .errors import CompileError, CompilerBugError, PrintableError, report_bug
+from .formatter import Formatter
 from .gir import add_typelib_search_path
 from .lsp import LanguageServer
 from .outputs import XmlOutput
@ -66,6 +67,36 @@ class BlueprintApp:
            type=argparse.FileType("r"),
        )

+        format = self.add_subcommand(
+            "format", "Format given blueprint files", self.cmd_format
+        )
+        format.add_argument(
+            "-f",
+            "--fix",
+            help="Apply the edits to the files",
+            default=False,
+            action="store_true",
+        )
+        format.add_argument(
+            "-t",
+            "--tabs",
+            help="Use tabs instead of spaces",
+            default=False,
+            action="store_true",
+        )
+        format.add_argument(
+            "-s",
+            "--spaces-num",
+            help="How many spaces should be used per indent",
+            default=2,
+            type=int,
+        )
+        format.add_argument(
+            "inputs",
+            nargs="+",
+            metavar="filenames",
+        )
+
        port = self.add_subcommand("port", "Interactive porting tool", self.cmd_port)

        lsp = self.add_subcommand(
@ -152,6 +183,111 @@ class BlueprintApp:
                e.pretty_print(file.name, data)
                sys.exit(1)

+    def cmd_format(self, opts):
+        input_files = []
+        missing_files = []
+        panic = False
+        formatted_files = 0
+        skipped_files = 0
+
+        for path in opts.inputs:
+            if os.path.isfile(path):
+                input_files.append(path)
+            elif os.path.isdir(path):
+                for root, subfolders, files in os.walk(path):
+                    for file in files:
+                        if file.endswith(".blp"):
+                            input_files.append(os.path.join(root, file))
+            else:
+                missing_files.append(path)
+
+        for file in input_files:
+            with open(file, "r+") as file:
+                data = file.read()
+                errored = False
+
+                try:
+                    self._compile(data)
+                except:
+                    errored = True
+
+                formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
+
+                if data != formatted_str:
+                    happened = "Would format"
+
+                    if opts.fix and not errored:
+                        file.seek(0)
+                        file.truncate()
+                        file.write(formatted_str)
+                        happened = "Formatted"
+
+                    diff_lines = []
+                    a_lines = data.splitlines(keepends=True)
+                    b_lines = formatted_str.splitlines(keepends=True)
+
+                    for line in difflib.unified_diff(
+                        a_lines, b_lines, fromfile=file.name, tofile=file.name, n=5
+                    ):
+                        # Work around https://bugs.python.org/issue2142
+                        # See:
+                        # https://www.gnu.org/software/diffutils/manual/html_node/Incomplete-Lines.html
+                        if line[-1] == "\n":
+                            diff_lines.append(line)
+                        else:
+                            diff_lines.append(line + "\n")
+                            diff_lines.append("\\ No newline at end of file\n")
+
+                    print("".join(diff_lines))
+                    to_print = Colors.BOLD
+                    if errored:
+                        to_print += f"{Colors.RED}Skipped {file.name}: Will not overwrite file with compile errors"
+                        panic = True
+                        skipped_files += 1
+                    else:
+                        to_print += f"{happened} {file.name}"
+                        formatted_files += 1
+
+                    print(to_print)
+                    print(Colors.CLEAR)
+
+        missing_num = len(missing_files)
+        summary = ""
+
+        if missing_num > 0:
+            print(
+                f"{Colors.BOLD}{Colors.RED}Could not find files:{Colors.CLEAR}{Colors.BOLD}"
+            )
+            for path in missing_files:
+                print(f"  {path}")
+            print(Colors.CLEAR)
+            panic = True
+
+        def would_be(verb):
+            return verb if opts.fix else f"would be {verb}"
+
+        def how_many(count, bold=True):
+            string = f"{Colors.BLUE}{count} {'files' if count != 1 else 'file'}{Colors.CLEAR}"
+            return Colors.BOLD + string + Colors.BOLD if bold else Colors.CLEAR + string
+
+        if formatted_files > 0:
+            summary += f"{how_many(formatted_files)} {would_be('formatted')}, "
+            panic = panic or not opts.fix
+
+        left_files = len(input_files) - formatted_files - skipped_files
+        summary += f"{how_many(left_files, False)} {would_be('left unchanged')}"
+
+        if skipped_files > 0:
+            summary += f", {how_many(skipped_files)} {would_be('skipped')}"
+
+        if missing_num > 0:
+            summary += f", {how_many(missing_num)} not found"
+
+        print(summary + Colors.CLEAR)
+
+        if panic:
+            sys.exit(1)
+
    def cmd_lsp(self, opts):
        langserv = LanguageServer()
        langserv.run()