Tiny formatter improvements

2025-06-30 00:29:26 -04:00 · 2023-12-19 02:29:09 +00:00 · 2023-12-19 02:29:09 +00:00 · e5cde71fc1
commit e5cde71fc1
parent e261180dcc
4 changed files with 155 additions and 166 deletions
--- a/blueprintcompiler/formatter.py
+++ b/blueprintcompiler/formatter.py
@ -44,192 +44,183 @@ class LineType(Enum):
    COMMENT = 4


-class Formatter:
-    def format(data, tab_size=2, insert_space=True):
-        indent_levels = 0
-        tokens = tokenizer.tokenize(data)
-        end_str = ""
-        last_not_whitespace = tokens[0]
+def format(data, tab_size=2, insert_space=True):
+    indent_levels = 0
+    tokens = tokenizer.tokenize(data)
+    end_str = ""
+    last_not_whitespace = tokens[0]
+    current_line = ""
+    prev_line_type = None
+    is_child_type = False
+    indent_item = " " * tab_size if insert_space else "\t"
+    watch_parentheses = False
+    parentheses_balance = 0
+    bracket_tracker = [None]
+    last_whitespace_contains_newline = False
+
+    def commit_current_line(
+        line_type=prev_line_type, redo_whitespace=False, newlines_before=1
+    ):
+        nonlocal end_str, current_line, prev_line_type
+
+        indent_whitespace = indent_levels * indent_item
+        whitespace_to_add = "\n" + indent_whitespace
+
+        if redo_whitespace or newlines_before != 1:
+            end_str = end_str.strip() + "\n" * newlines_before
+            if newlines_before > 0:
+                end_str += indent_whitespace
+
+        end_str += current_line + whitespace_to_add
+
        current_line = ""
-        prev_line_type = None
-        is_child_type = False
-        indent_item = " " * tab_size if insert_space else "\t"
-        watch_parentheses = False
-        parentheses_balance = 0
-        bracket_tracker = [None]
-        last_whitespace_contains_newline = False
+        prev_line_type = line_type

-        def commit_current_line(
-            line_type=prev_line_type, redo_whitespace=False, newlines_before=1
-        ):
-            nonlocal end_str, current_line, prev_line_type
+    for item in tokens:
+        str_item = str(item)

-            indent_whitespace = indent_levels * indent_item
-            whitespace_to_add = "\n" + indent_whitespace
+        if item.type == TokenType.WHITESPACE:
+            last_whitespace_contains_newline = "\n" in str_item
+            continue

-            if redo_whitespace or newlines_before != 1:
-                end_str = end_str.strip() + "\n" * newlines_before
-                if newlines_before > 0:
-                    end_str += indent_whitespace
+        whitespace_required = (
+            str_item in WHITESPACE_BEFORE
+            or str(last_not_whitespace) in WHITESPACE_AFTER
+            or (str_item == "(" and end_str.endswith(": bind"))
+        )
+        whitespace_blockers = (
+            str_item in NO_WHITESPACE_BEFORE
+            or str(last_not_whitespace) in NO_WHITESPACE_AFTER
+            or (str_item == "<" and str(last_not_whitespace) == "typeof")
+        )

-            end_str += current_line + whitespace_to_add
+        this_or_last_is_ident = TokenType.IDENT in (item.type, last_not_whitespace.type)
+        current_line_is_empty = len(current_line) == 0
+        is_function = str_item == "(" and not re.match(
+            r"^([A-Za-z_\-])+(: bind)?$", current_line
+        )

-            current_line = ""
-            prev_line_type = line_type
+        any_blockers = whitespace_blockers or current_line_is_empty or is_function
+        if (whitespace_required or this_or_last_is_ident) and not any_blockers:
+            current_line += " "

-        for item in tokens:
-            str_item = str(item)
+        current_line += str_item

-            if item.type == TokenType.WHITESPACE:
-                last_whitespace_contains_newline = "\n" in str_item
-                continue
+        if str_item in ("[", "("):
+            bracket_tracker.append(str_item)
+        elif str_item in ("]", ")"):
+            bracket_tracker.pop()

-            whitespace_required = (
-                str_item in WHITESPACE_BEFORE
-                or str(last_not_whitespace) in WHITESPACE_AFTER
-                or (str_item == "(" and end_str.endswith(": bind"))
-            )
-            whitespace_blockers = (
-                str_item in NO_WHITESPACE_BEFORE
-                or str(last_not_whitespace) in NO_WHITESPACE_AFTER
-                or (str_item == "<" and str(last_not_whitespace) == "typeof")
-            )
+        needs_newline_treatment = (
+            str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
+        )
+        if needs_newline_treatment:
+            if str_item in OPENING_TOKENS:
+                list_or_child_type = str_item == "["
+                if list_or_child_type:
+                    is_child_type = current_line.startswith("[")

-            this_or_last_is_ident = (
-                item.type == TokenType.IDENT
-                or last_not_whitespace.type == TokenType.IDENT
-            )
-            current_line_is_empty = len(current_line) == 0
-            is_function = str_item == "(" and not re.match(
-                r"^([A-Za-z_\-])+(: bind)?$", current_line
-            )
+                    if is_child_type:
+                        if str(last_not_whitespace) not in OPENING_TOKENS:
+                            end_str = (
+                                end_str.strip() + "\n\n" + (indent_item * indent_levels)
+                            )
+                        last_not_whitespace = item
+                        continue

-            any_blockers = whitespace_blockers or current_line_is_empty or is_function
+                indent_levels += 1
+                keep_same_indent = prev_line_type not in (
+                    LineType.CHILD_TYPE,
+                    LineType.COMMENT,
+                    LineType.BLOCK_OPEN,
+                )
+                if keep_same_indent:
+                    end_str = (
+                        end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
+                    )
+                commit_current_line(LineType.BLOCK_OPEN)

-            if (whitespace_required or this_or_last_is_ident) and not any_blockers:
-                current_line += " "
+            elif str_item == "]" and is_child_type:
+                commit_current_line(LineType.CHILD_TYPE, False)
+                is_child_type = False

-            current_line += str_item
+            elif str_item in CLOSING_TOKENS:
+                if str_item == "]" and last_not_whitespace != ",":
+                    current_line = current_line[:-1]
+                    commit_current_line()
+                    current_line = "]"
+                elif str(last_not_whitespace) in OPENING_TOKENS:
+                    end_str = end_str.strip()
+                    commit_current_line(LineType.BLOCK_CLOSE, True, 0)

-            if str_item in ["[", "("]:
-                bracket_tracker.append(str_item)
-            elif str_item in ["]", ")"]:
-                bracket_tracker.pop()
+                indent_levels -= 1
+                commit_current_line(LineType.BLOCK_CLOSE, True)

-            needs_newline_treatment = (
-                str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
-            )
-            if needs_newline_treatment:
-                if str_item in OPENING_TOKENS:
-                    list_or_child_type = str_item == "["
-                    if list_or_child_type:
-                        is_child_type = current_line.startswith("[")
+            elif str_item == ";":
+                line_type = LineType.STATEMENT
+                newlines = 1

-                        if is_child_type:
-                            if str(last_not_whitespace) not in OPENING_TOKENS:
-                                end_str = (
-                                    end_str.strip()
-                                    + "\n\n"
-                                    + (indent_item * indent_levels)
-                                )
-                            last_not_whitespace = item
-                            continue
+                if len(current_line) == 1:
+                    newlines = 0
+                    line_type = LineType.BLOCK_CLOSE
+                elif prev_line_type == LineType.BLOCK_CLOSE:
+                    newlines = 2

-                    indent_levels += 1
-                    keep_same_indent = not prev_line_type in [
-                        LineType.CHILD_TYPE,
-                        LineType.COMMENT,
-                        LineType.BLOCK_OPEN,
-                    ]
-                    if keep_same_indent:
-                        end_str = (
-                            end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
-                        )
-                    commit_current_line(LineType.BLOCK_OPEN)
+                commit_current_line(line_type, newlines_before=newlines)

-                elif str_item == "]" and is_child_type:
-                    commit_current_line(LineType.CHILD_TYPE, False)
-                    is_child_type = False
+            elif item.type == TokenType.COMMENT:
+                require_extra_newline = (
+                    LineType.BLOCK_CLOSE,
+                    LineType.STATEMENT,
+                    LineType.COMMENT,
+                )

-                elif str_item in CLOSING_TOKENS:
-                    if str_item == "]" and last_not_whitespace != ",":
-                        current_line = current_line[:-1]
-                        commit_current_line()
-                        current_line = "]"
-                    elif str(last_not_whitespace) in OPENING_TOKENS:
-                        end_str = end_str.strip()
-                        commit_current_line(LineType.BLOCK_CLOSE, True, 0)
+                single_line_comment = str_item.startswith("//")
+                newlines = 1
+                if single_line_comment:
+                    if not str_item.startswith("// "):
+                        current_line = f"// {current_line[2:]}"

-                    indent_levels -= 1
-                    commit_current_line(LineType.BLOCK_CLOSE, True)
-
-                elif str_item == ";":
-                    line_type = LineType.STATEMENT
-                    if len(current_line) == 1:
+                    if not last_whitespace_contains_newline:
+                        current_line = " " + current_line
                        newlines = 0
-                        line_type = LineType.BLOCK_CLOSE
                    elif prev_line_type == LineType.BLOCK_CLOSE:
                        newlines = 2
-                    else:
-                        newlines = 1

-                    commit_current_line(line_type, newlines_before=newlines)
+                elif prev_line_type in require_extra_newline:
+                    newlines = 2

-                elif item.type == TokenType.COMMENT:
-                    require_extra_newline = [
-                        LineType.BLOCK_CLOSE,
-                        LineType.STATEMENT,
-                        LineType.COMMENT,
-                    ]
+                commit_current_line(LineType.COMMENT, newlines_before=newlines)

-                    single_line_comment = str_item.startswith("//")
-                    newlines = 1
-                    if single_line_comment:
-                        if not str_item.startswith("// "):
-                            current_line = f"// {current_line[2:]}"
+            else:
+                commit_current_line()

-                        if not last_whitespace_contains_newline:
-                            current_line = " " + current_line
-                            newlines = 0
-                        elif prev_line_type == LineType.BLOCK_CLOSE:
-                            newlines = 2
+        elif str_item == "(" and (
+            re.match(r"^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
+        ):
+            watch_parentheses = True
+            parentheses_balance += 1

-                    elif prev_line_type in require_extra_newline:
-                        newlines = 2
+        elif str_item == ")" and watch_parentheses:
+            parentheses_balance -= 1
+            all_parentheses_closed = parentheses_balance == 0
+            if all_parentheses_closed:
+                commit_current_line(
+                    newlines_before=2 if prev_line_type == LineType.BLOCK_CLOSE else 1
+                )
+                watch_parentheses = False

-                    commit_current_line(LineType.COMMENT, newlines_before=newlines)
+        tracker_is_empty = len(bracket_tracker) > 0
+        if tracker_is_empty:
+            last_in_tracker = bracket_tracker[-1]
+            is_list_comma = last_in_tracker == "[" and str_item == ","
+            if is_list_comma:
+                last_was_list_item = end_str.strip()[-1] not in ("[", ",")
+                if last_was_list_item:
+                    end_str = end_str.strip()
+                commit_current_line()

-                else:
-                    commit_current_line()
+        last_not_whitespace = item
+        last_whitespace_contains_newline = False

-            elif str_item == "(" and (
-                re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
-            ):
-                watch_parentheses = True
-                parentheses_balance += 1
-
-            elif str_item == ")" and watch_parentheses:
-                parentheses_balance -= 1
-                all_parentheses_closed = parentheses_balance == 0
-                if all_parentheses_closed:
-                    commit_current_line(
-                        newlines_before=2
-                        if prev_line_type == LineType.BLOCK_CLOSE
-                        else 1
-                    )
-                    watch_parentheses = False
-
-            tracker_is_empty = len(bracket_tracker) > 0
-            if tracker_is_empty:
-                last_in_tracker = bracket_tracker[-1]
-                is_list_comma = last_in_tracker == "[" and str_item == ","
-                if is_list_comma:
-                    last_was_list_item = end_str.strip()[-1] not in ["[", ","]
-                    if last_was_list_item:
-                        end_str = end_str.strip()
-                    commit_current_line()
-
-            last_not_whitespace = item
-            last_whitespace_contains_newline = False
-
-        return end_str.strip() + "\n"
+    return end_str.strip() + "\n"
--- a/blueprintcompiler/lsp.py
+++ b/blueprintcompiler/lsp.py
@ -24,11 +24,10 @@ import traceback
 import typing as T
 from difflib import SequenceMatcher

-from . import decompiler, parser, tokenizer, utils, xml_reader
+from . import decompiler, formatter, parser, tokenizer, utils, xml_reader
 from .ast_utils import AstNode
 from .completions import complete
 from .errors import CompileError, MultipleErrors
-from .formatter import Formatter
 from .lsp_utils import *
 from .outputs.xml import XmlOutput
 from .tokenizer import Token
@ -292,7 +291,7 @@ class LanguageServer:
            return

        try:
-            formatted_blp = Formatter.format(
+            formatted_blp = formatter.format(
                open_file.text,
                params["options"]["tabSize"],
                params["options"]["insertSpaces"],
--- a/blueprintcompiler/main.py
+++ b/blueprintcompiler/main.py
@ -24,9 +24,8 @@ import os
 import sys
 import typing as T

-from . import interactive_port, parser, tokenizer
+from . import formatter, interactive_port, parser, tokenizer
 from .errors import CompileError, CompilerBugError, PrintableError, report_bug
-from .formatter import Formatter
 from .gir import add_typelib_search_path
 from .lsp import LanguageServer
 from .outputs import XmlOutput
@ -211,7 +210,7 @@ class BlueprintApp:
                except:
                    errored = True

-                formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
+                formatted_str = formatter.format(data, opts.spaces_num, not opts.tabs)

                if data != formatted_str:
                    happened = "Would format"
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@ -21,7 +21,7 @@
 import unittest
 from pathlib import Path

-from blueprintcompiler.formatter import Formatter
+from blueprintcompiler import formatter


 class TestFormatter(unittest.TestCase):
@ -38,7 +38,7 @@ class TestFormatter(unittest.TestCase):
        ) as f:
            expected = f.read()

-        actual = Formatter.format(input_data)
+        actual = formatter.format(input_data)
        self.assertEqual(actual, expected)

    def test_formatter(self):