Tiny formatter improvements

2025-07-12 02:09:09 -04:00 · 2023-12-19 02:29:09 +00:00 · 2023-12-19 02:29:09 +00:00 · e5cde71fc1
commit e5cde71fc1
parent e261180dcc
4 changed files with 155 additions and 166 deletions
--- a/blueprintcompiler/formatter.py
+++ b/blueprintcompiler/formatter.py
@ -44,192 +44,183 @@ class LineType(Enum):
    COMMENT = 4
-class Formatter:
+def format(data, tab_size=2, insert_space=True):
-    def format(data, tab_size=2, insert_space=True):
+    indent_levels = 0
-        indent_levels = 0
+    tokens = tokenizer.tokenize(data)
-        tokens = tokenizer.tokenize(data)
+    end_str = ""
-        end_str = ""
+    last_not_whitespace = tokens[0]
-        last_not_whitespace = tokens[0]
+    current_line = ""
    prev_line_type = None
    is_child_type = False
    indent_item = " " * tab_size if insert_space else "\t"
    watch_parentheses = False
    parentheses_balance = 0
    bracket_tracker = [None]
    last_whitespace_contains_newline = False
    def commit_current_line(
        line_type=prev_line_type, redo_whitespace=False, newlines_before=1
    ):
        nonlocal end_str, current_line, prev_line_type
        indent_whitespace = indent_levels * indent_item
        whitespace_to_add = "\n" + indent_whitespace
        if redo_whitespace or newlines_before != 1:
            end_str = end_str.strip() + "\n" * newlines_before
            if newlines_before > 0:
                end_str += indent_whitespace
        end_str += current_line + whitespace_to_add
        current_line = ""
-        prev_line_type = None
+        prev_line_type = line_type
        is_child_type = False
        indent_item = " " * tab_size if insert_space else "\t"
        watch_parentheses = False
        parentheses_balance = 0
        bracket_tracker = [None]
        last_whitespace_contains_newline = False
-        def commit_current_line(
+    for item in tokens:
-            line_type=prev_line_type, redo_whitespace=False, newlines_before=1
+        str_item = str(item)
        ):
            nonlocal end_str, current_line, prev_line_type
-            indent_whitespace = indent_levels * indent_item
+        if item.type == TokenType.WHITESPACE:
-            whitespace_to_add = "\n" + indent_whitespace
+            last_whitespace_contains_newline = "\n" in str_item
            continue
-            if redo_whitespace or newlines_before != 1:
+        whitespace_required = (
-                end_str = end_str.strip() + "\n" * newlines_before
+            str_item in WHITESPACE_BEFORE
-                if newlines_before > 0:
+            or str(last_not_whitespace) in WHITESPACE_AFTER
-                    end_str += indent_whitespace
+            or (str_item == "(" and end_str.endswith(": bind"))
        )
        whitespace_blockers = (
            str_item in NO_WHITESPACE_BEFORE
            or str(last_not_whitespace) in NO_WHITESPACE_AFTER
            or (str_item == "<" and str(last_not_whitespace) == "typeof")
        )
-            end_str += current_line + whitespace_to_add
+        this_or_last_is_ident = TokenType.IDENT in (item.type, last_not_whitespace.type)
        current_line_is_empty = len(current_line) == 0
        is_function = str_item == "(" and not re.match(
            r"^([A-Za-z_\-])+(: bind)?$", current_line
        )
-            current_line = ""
+        any_blockers = whitespace_blockers or current_line_is_empty or is_function
-            prev_line_type = line_type
+        if (whitespace_required or this_or_last_is_ident) and not any_blockers:
            current_line += " "
-        for item in tokens:
+        current_line += str_item
            str_item = str(item)
-            if item.type == TokenType.WHITESPACE:
+        if str_item in ("[", "("):
-                last_whitespace_contains_newline = "\n" in str_item
+            bracket_tracker.append(str_item)
-                continue
+        elif str_item in ("]", ")"):
            bracket_tracker.pop()
-            whitespace_required = (
+        needs_newline_treatment = (
-                str_item in WHITESPACE_BEFORE
+            str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
-                or str(last_not_whitespace) in WHITESPACE_AFTER
+        )
-                or (str_item == "(" and end_str.endswith(": bind"))
+        if needs_newline_treatment:
-            )
+            if str_item in OPENING_TOKENS:
-            whitespace_blockers = (
+                list_or_child_type = str_item == "["
-                str_item in NO_WHITESPACE_BEFORE
+                if list_or_child_type:
-                or str(last_not_whitespace) in NO_WHITESPACE_AFTER
+                    is_child_type = current_line.startswith("[")
                or (str_item == "<" and str(last_not_whitespace) == "typeof")
            )
-            this_or_last_is_ident = (
+                    if is_child_type:
-                item.type == TokenType.IDENT
+                        if str(last_not_whitespace) not in OPENING_TOKENS:
-                or last_not_whitespace.type == TokenType.IDENT
+                            end_str = (
-            )
+                                end_str.strip() + "\n\n" + (indent_item * indent_levels)
-            current_line_is_empty = len(current_line) == 0
+                            )
-            is_function = str_item == "(" and not re.match(
+                        last_not_whitespace = item
-                r"^([A-Za-z_\-])+(: bind)?$", current_line
+                        continue
            )
-            any_blockers = whitespace_blockers or current_line_is_empty or is_function
+                indent_levels += 1
                keep_same_indent = prev_line_type not in (
                    LineType.CHILD_TYPE,
                    LineType.COMMENT,
                    LineType.BLOCK_OPEN,
                )
                if keep_same_indent:
                    end_str = (
                        end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
                    )
                commit_current_line(LineType.BLOCK_OPEN)
-            if (whitespace_required or this_or_last_is_ident) and not any_blockers:
+            elif str_item == "]" and is_child_type:
-                current_line += " "
+                commit_current_line(LineType.CHILD_TYPE, False)
                is_child_type = False
-            current_line += str_item
+            elif str_item in CLOSING_TOKENS:
                if str_item == "]" and last_not_whitespace != ",":
                    current_line = current_line[:-1]
                    commit_current_line()
                    current_line = "]"
                elif str(last_not_whitespace) in OPENING_TOKENS:
                    end_str = end_str.strip()
                    commit_current_line(LineType.BLOCK_CLOSE, True, 0)
-            if str_item in ["[", "("]:
+                indent_levels -= 1
-                bracket_tracker.append(str_item)
+                commit_current_line(LineType.BLOCK_CLOSE, True)
            elif str_item in ["]", ")"]:
                bracket_tracker.pop()
-            needs_newline_treatment = (
+            elif str_item == ";":
-                str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
+                line_type = LineType.STATEMENT
-            )
+                newlines = 1
            if needs_newline_treatment:
                if str_item in OPENING_TOKENS:
                    list_or_child_type = str_item == "["
                    if list_or_child_type:
                        is_child_type = current_line.startswith("[")
-                        if is_child_type:
+                if len(current_line) == 1:
-                            if str(last_not_whitespace) not in OPENING_TOKENS:
+                    newlines = 0
-                                end_str = (
+                    line_type = LineType.BLOCK_CLOSE
-                                    end_str.strip()
+                elif prev_line_type == LineType.BLOCK_CLOSE:
-                                    + "\n\n"
+                    newlines = 2
                                    + (indent_item * indent_levels)
                                )
                            last_not_whitespace = item
                            continue
-                    indent_levels += 1
+                commit_current_line(line_type, newlines_before=newlines)
                    keep_same_indent = not prev_line_type in [
                        LineType.CHILD_TYPE,
                        LineType.COMMENT,
                        LineType.BLOCK_OPEN,
                    ]
                    if keep_same_indent:
                        end_str = (
                            end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
                        )
                    commit_current_line(LineType.BLOCK_OPEN)
-                elif str_item == "]" and is_child_type:
+            elif item.type == TokenType.COMMENT:
-                    commit_current_line(LineType.CHILD_TYPE, False)
+                require_extra_newline = (
-                    is_child_type = False
+                    LineType.BLOCK_CLOSE,
                    LineType.STATEMENT,
                    LineType.COMMENT,
                )
-                elif str_item in CLOSING_TOKENS:
+                single_line_comment = str_item.startswith("//")
-                    if str_item == "]" and last_not_whitespace != ",":
+                newlines = 1
-                        current_line = current_line[:-1]
+                if single_line_comment:
-                        commit_current_line()
+                    if not str_item.startswith("// "):
-                        current_line = "]"
+                        current_line = f"// {current_line[2:]}"
                    elif str(last_not_whitespace) in OPENING_TOKENS:
                        end_str = end_str.strip()
                        commit_current_line(LineType.BLOCK_CLOSE, True, 0)
-                    indent_levels -= 1
+                    if not last_whitespace_contains_newline:
-                    commit_current_line(LineType.BLOCK_CLOSE, True)
+                        current_line = " " + current_line
                elif str_item == ";":
                    line_type = LineType.STATEMENT
                    if len(current_line) == 1:
                        newlines = 0
                        line_type = LineType.BLOCK_CLOSE
                    elif prev_line_type == LineType.BLOCK_CLOSE:
                        newlines = 2
                    else:
                        newlines = 1
-                    commit_current_line(line_type, newlines_before=newlines)
+                elif prev_line_type in require_extra_newline:
                    newlines = 2
-                elif item.type == TokenType.COMMENT:
+                commit_current_line(LineType.COMMENT, newlines_before=newlines)
                    require_extra_newline = [
                        LineType.BLOCK_CLOSE,
                        LineType.STATEMENT,
                        LineType.COMMENT,
                    ]
-                    single_line_comment = str_item.startswith("//")
+            else:
-                    newlines = 1
+                commit_current_line()
                    if single_line_comment:
                        if not str_item.startswith("// "):
                            current_line = f"// {current_line[2:]}"
-                        if not last_whitespace_contains_newline:
+        elif str_item == "(" and (
-                            current_line = " " + current_line
+            re.match(r"^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
-                            newlines = 0
+        ):
-                        elif prev_line_type == LineType.BLOCK_CLOSE:
+            watch_parentheses = True
-                            newlines = 2
+            parentheses_balance += 1
-                    elif prev_line_type in require_extra_newline:
+        elif str_item == ")" and watch_parentheses:
-                        newlines = 2
+            parentheses_balance -= 1
            all_parentheses_closed = parentheses_balance == 0
            if all_parentheses_closed:
                commit_current_line(
                    newlines_before=2 if prev_line_type == LineType.BLOCK_CLOSE else 1
                )
                watch_parentheses = False
-                    commit_current_line(LineType.COMMENT, newlines_before=newlines)
+        tracker_is_empty = len(bracket_tracker) > 0
        if tracker_is_empty:
            last_in_tracker = bracket_tracker[-1]
            is_list_comma = last_in_tracker == "[" and str_item == ","
            if is_list_comma:
                last_was_list_item = end_str.strip()[-1] not in ("[", ",")
                if last_was_list_item:
                    end_str = end_str.strip()
                commit_current_line()
-                else:
+        last_not_whitespace = item
-                    commit_current_line()
+        last_whitespace_contains_newline = False
-            elif str_item == "(" and (
+    return end_str.strip() + "\n"
                re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
            ):
                watch_parentheses = True
                parentheses_balance += 1
            elif str_item == ")" and watch_parentheses:
                parentheses_balance -= 1
                all_parentheses_closed = parentheses_balance == 0
                if all_parentheses_closed:
                    commit_current_line(
                        newlines_before=2
                        if prev_line_type == LineType.BLOCK_CLOSE
                        else 1
                    )
                    watch_parentheses = False
            tracker_is_empty = len(bracket_tracker) > 0
            if tracker_is_empty:
                last_in_tracker = bracket_tracker[-1]
                is_list_comma = last_in_tracker == "[" and str_item == ","
                if is_list_comma:
                    last_was_list_item = end_str.strip()[-1] not in ["[", ","]
                    if last_was_list_item:
                        end_str = end_str.strip()
                    commit_current_line()
            last_not_whitespace = item
            last_whitespace_contains_newline = False
        return end_str.strip() + "\n"
--- a/blueprintcompiler/lsp.py
+++ b/blueprintcompiler/lsp.py
@ -24,11 +24,10 @@ import traceback
 import typing as T
 from difflib import SequenceMatcher
-from . import decompiler, parser, tokenizer, utils, xml_reader
+from . import decompiler, formatter, parser, tokenizer, utils, xml_reader
 from .ast_utils import AstNode
 from .completions import complete
 from .errors import CompileError, MultipleErrors
 from .formatter import Formatter
 from .lsp_utils import *
 from .outputs.xml import XmlOutput
 from .tokenizer import Token
@ -292,7 +291,7 @@ class LanguageServer:
            return
        try:
-            formatted_blp = Formatter.format(
+            formatted_blp = formatter.format(
                open_file.text,
                params["options"]["tabSize"],
                params["options"]["insertSpaces"],
--- a/blueprintcompiler/main.py
+++ b/blueprintcompiler/main.py
@ -24,9 +24,8 @@ import os
 import sys
 import typing as T
-from . import interactive_port, parser, tokenizer
+from . import formatter, interactive_port, parser, tokenizer
 from .errors import CompileError, CompilerBugError, PrintableError, report_bug
 from .formatter import Formatter
 from .gir import add_typelib_search_path
 from .lsp import LanguageServer
 from .outputs import XmlOutput
@ -211,7 +210,7 @@ class BlueprintApp:
                except:
                    errored = True
-                formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
+                formatted_str = formatter.format(data, opts.spaces_num, not opts.tabs)
                if data != formatted_str:
                    happened = "Would format"
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@ -21,7 +21,7 @@
 import unittest
 from pathlib import Path
-from blueprintcompiler.formatter import Formatter
+from blueprintcompiler import formatter
 class TestFormatter(unittest.TestCase):
@ -38,7 +38,7 @@ class TestFormatter(unittest.TestCase):
        ) as f:
            expected = f.read()
-        actual = Formatter.format(input_data)
+        actual = formatter.format(input_data)
        self.assertEqual(actual, expected)
    def test_formatter(self):