Tiny formatter improvements

This commit is contained in:
gregorni 2023-12-19 02:29:09 +00:00 committed by James Westman
parent e261180dcc
commit e5cde71fc1
4 changed files with 155 additions and 166 deletions

View file

@ -44,192 +44,183 @@ class LineType(Enum):
COMMENT = 4 COMMENT = 4
class Formatter: def format(data, tab_size=2, insert_space=True):
def format(data, tab_size=2, insert_space=True): indent_levels = 0
indent_levels = 0 tokens = tokenizer.tokenize(data)
tokens = tokenizer.tokenize(data) end_str = ""
end_str = "" last_not_whitespace = tokens[0]
last_not_whitespace = tokens[0] current_line = ""
prev_line_type = None
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
watch_parentheses = False
parentheses_balance = 0
bracket_tracker = [None]
last_whitespace_contains_newline = False
def commit_current_line(
line_type=prev_line_type, redo_whitespace=False, newlines_before=1
):
nonlocal end_str, current_line, prev_line_type
indent_whitespace = indent_levels * indent_item
whitespace_to_add = "\n" + indent_whitespace
if redo_whitespace or newlines_before != 1:
end_str = end_str.strip() + "\n" * newlines_before
if newlines_before > 0:
end_str += indent_whitespace
end_str += current_line + whitespace_to_add
current_line = "" current_line = ""
prev_line_type = None prev_line_type = line_type
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
watch_parentheses = False
parentheses_balance = 0
bracket_tracker = [None]
last_whitespace_contains_newline = False
def commit_current_line( for item in tokens:
line_type=prev_line_type, redo_whitespace=False, newlines_before=1 str_item = str(item)
):
nonlocal end_str, current_line, prev_line_type
indent_whitespace = indent_levels * indent_item if item.type == TokenType.WHITESPACE:
whitespace_to_add = "\n" + indent_whitespace last_whitespace_contains_newline = "\n" in str_item
continue
if redo_whitespace or newlines_before != 1: whitespace_required = (
end_str = end_str.strip() + "\n" * newlines_before str_item in WHITESPACE_BEFORE
if newlines_before > 0: or str(last_not_whitespace) in WHITESPACE_AFTER
end_str += indent_whitespace or (str_item == "(" and end_str.endswith(": bind"))
)
whitespace_blockers = (
str_item in NO_WHITESPACE_BEFORE
or str(last_not_whitespace) in NO_WHITESPACE_AFTER
or (str_item == "<" and str(last_not_whitespace) == "typeof")
)
end_str += current_line + whitespace_to_add this_or_last_is_ident = TokenType.IDENT in (item.type, last_not_whitespace.type)
current_line_is_empty = len(current_line) == 0
is_function = str_item == "(" and not re.match(
r"^([A-Za-z_\-])+(: bind)?$", current_line
)
current_line = "" any_blockers = whitespace_blockers or current_line_is_empty or is_function
prev_line_type = line_type if (whitespace_required or this_or_last_is_ident) and not any_blockers:
current_line += " "
for item in tokens: current_line += str_item
str_item = str(item)
if item.type == TokenType.WHITESPACE: if str_item in ("[", "("):
last_whitespace_contains_newline = "\n" in str_item bracket_tracker.append(str_item)
continue elif str_item in ("]", ")"):
bracket_tracker.pop()
whitespace_required = ( needs_newline_treatment = (
str_item in WHITESPACE_BEFORE str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
or str(last_not_whitespace) in WHITESPACE_AFTER )
or (str_item == "(" and end_str.endswith(": bind")) if needs_newline_treatment:
) if str_item in OPENING_TOKENS:
whitespace_blockers = ( list_or_child_type = str_item == "["
str_item in NO_WHITESPACE_BEFORE if list_or_child_type:
or str(last_not_whitespace) in NO_WHITESPACE_AFTER is_child_type = current_line.startswith("[")
or (str_item == "<" and str(last_not_whitespace) == "typeof")
)
this_or_last_is_ident = ( if is_child_type:
item.type == TokenType.IDENT if str(last_not_whitespace) not in OPENING_TOKENS:
or last_not_whitespace.type == TokenType.IDENT end_str = (
) end_str.strip() + "\n\n" + (indent_item * indent_levels)
current_line_is_empty = len(current_line) == 0 )
is_function = str_item == "(" and not re.match( last_not_whitespace = item
r"^([A-Za-z_\-])+(: bind)?$", current_line continue
)
any_blockers = whitespace_blockers or current_line_is_empty or is_function indent_levels += 1
keep_same_indent = prev_line_type not in (
LineType.CHILD_TYPE,
LineType.COMMENT,
LineType.BLOCK_OPEN,
)
if keep_same_indent:
end_str = (
end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
)
commit_current_line(LineType.BLOCK_OPEN)
if (whitespace_required or this_or_last_is_ident) and not any_blockers: elif str_item == "]" and is_child_type:
current_line += " " commit_current_line(LineType.CHILD_TYPE, False)
is_child_type = False
current_line += str_item elif str_item in CLOSING_TOKENS:
if str_item == "]" and last_not_whitespace != ",":
current_line = current_line[:-1]
commit_current_line()
current_line = "]"
elif str(last_not_whitespace) in OPENING_TOKENS:
end_str = end_str.strip()
commit_current_line(LineType.BLOCK_CLOSE, True, 0)
if str_item in ["[", "("]: indent_levels -= 1
bracket_tracker.append(str_item) commit_current_line(LineType.BLOCK_CLOSE, True)
elif str_item in ["]", ")"]:
bracket_tracker.pop()
needs_newline_treatment = ( elif str_item == ";":
str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT line_type = LineType.STATEMENT
) newlines = 1
if needs_newline_treatment:
if str_item in OPENING_TOKENS:
list_or_child_type = str_item == "["
if list_or_child_type:
is_child_type = current_line.startswith("[")
if is_child_type: if len(current_line) == 1:
if str(last_not_whitespace) not in OPENING_TOKENS: newlines = 0
end_str = ( line_type = LineType.BLOCK_CLOSE
end_str.strip() elif prev_line_type == LineType.BLOCK_CLOSE:
+ "\n\n" newlines = 2
+ (indent_item * indent_levels)
)
last_not_whitespace = item
continue
indent_levels += 1 commit_current_line(line_type, newlines_before=newlines)
keep_same_indent = not prev_line_type in [
LineType.CHILD_TYPE,
LineType.COMMENT,
LineType.BLOCK_OPEN,
]
if keep_same_indent:
end_str = (
end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
)
commit_current_line(LineType.BLOCK_OPEN)
elif str_item == "]" and is_child_type: elif item.type == TokenType.COMMENT:
commit_current_line(LineType.CHILD_TYPE, False) require_extra_newline = (
is_child_type = False LineType.BLOCK_CLOSE,
LineType.STATEMENT,
LineType.COMMENT,
)
elif str_item in CLOSING_TOKENS: single_line_comment = str_item.startswith("//")
if str_item == "]" and last_not_whitespace != ",": newlines = 1
current_line = current_line[:-1] if single_line_comment:
commit_current_line() if not str_item.startswith("// "):
current_line = "]" current_line = f"// {current_line[2:]}"
elif str(last_not_whitespace) in OPENING_TOKENS:
end_str = end_str.strip()
commit_current_line(LineType.BLOCK_CLOSE, True, 0)
indent_levels -= 1 if not last_whitespace_contains_newline:
commit_current_line(LineType.BLOCK_CLOSE, True) current_line = " " + current_line
elif str_item == ";":
line_type = LineType.STATEMENT
if len(current_line) == 1:
newlines = 0 newlines = 0
line_type = LineType.BLOCK_CLOSE
elif prev_line_type == LineType.BLOCK_CLOSE: elif prev_line_type == LineType.BLOCK_CLOSE:
newlines = 2 newlines = 2
else:
newlines = 1
commit_current_line(line_type, newlines_before=newlines) elif prev_line_type in require_extra_newline:
newlines = 2
elif item.type == TokenType.COMMENT: commit_current_line(LineType.COMMENT, newlines_before=newlines)
require_extra_newline = [
LineType.BLOCK_CLOSE,
LineType.STATEMENT,
LineType.COMMENT,
]
single_line_comment = str_item.startswith("//") else:
newlines = 1 commit_current_line()
if single_line_comment:
if not str_item.startswith("// "):
current_line = f"// {current_line[2:]}"
if not last_whitespace_contains_newline: elif str_item == "(" and (
current_line = " " + current_line re.match(r"^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
newlines = 0 ):
elif prev_line_type == LineType.BLOCK_CLOSE: watch_parentheses = True
newlines = 2 parentheses_balance += 1
elif prev_line_type in require_extra_newline: elif str_item == ")" and watch_parentheses:
newlines = 2 parentheses_balance -= 1
all_parentheses_closed = parentheses_balance == 0
if all_parentheses_closed:
commit_current_line(
newlines_before=2 if prev_line_type == LineType.BLOCK_CLOSE else 1
)
watch_parentheses = False
commit_current_line(LineType.COMMENT, newlines_before=newlines) tracker_is_empty = len(bracket_tracker) > 0
if tracker_is_empty:
last_in_tracker = bracket_tracker[-1]
is_list_comma = last_in_tracker == "[" and str_item == ","
if is_list_comma:
last_was_list_item = end_str.strip()[-1] not in ("[", ",")
if last_was_list_item:
end_str = end_str.strip()
commit_current_line()
else: last_not_whitespace = item
commit_current_line() last_whitespace_contains_newline = False
elif str_item == "(" and ( return end_str.strip() + "\n"
re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
):
watch_parentheses = True
parentheses_balance += 1
elif str_item == ")" and watch_parentheses:
parentheses_balance -= 1
all_parentheses_closed = parentheses_balance == 0
if all_parentheses_closed:
commit_current_line(
newlines_before=2
if prev_line_type == LineType.BLOCK_CLOSE
else 1
)
watch_parentheses = False
tracker_is_empty = len(bracket_tracker) > 0
if tracker_is_empty:
last_in_tracker = bracket_tracker[-1]
is_list_comma = last_in_tracker == "[" and str_item == ","
if is_list_comma:
last_was_list_item = end_str.strip()[-1] not in ["[", ","]
if last_was_list_item:
end_str = end_str.strip()
commit_current_line()
last_not_whitespace = item
last_whitespace_contains_newline = False
return end_str.strip() + "\n"

View file

@ -24,11 +24,10 @@ import traceback
import typing as T import typing as T
from difflib import SequenceMatcher from difflib import SequenceMatcher
from . import decompiler, parser, tokenizer, utils, xml_reader from . import decompiler, formatter, parser, tokenizer, utils, xml_reader
from .ast_utils import AstNode from .ast_utils import AstNode
from .completions import complete from .completions import complete
from .errors import CompileError, MultipleErrors from .errors import CompileError, MultipleErrors
from .formatter import Formatter
from .lsp_utils import * from .lsp_utils import *
from .outputs.xml import XmlOutput from .outputs.xml import XmlOutput
from .tokenizer import Token from .tokenizer import Token
@ -292,7 +291,7 @@ class LanguageServer:
return return
try: try:
formatted_blp = Formatter.format( formatted_blp = formatter.format(
open_file.text, open_file.text,
params["options"]["tabSize"], params["options"]["tabSize"],
params["options"]["insertSpaces"], params["options"]["insertSpaces"],

View file

@ -24,9 +24,8 @@ import os
import sys import sys
import typing as T import typing as T
from . import interactive_port, parser, tokenizer from . import formatter, interactive_port, parser, tokenizer
from .errors import CompileError, CompilerBugError, PrintableError, report_bug from .errors import CompileError, CompilerBugError, PrintableError, report_bug
from .formatter import Formatter
from .gir import add_typelib_search_path from .gir import add_typelib_search_path
from .lsp import LanguageServer from .lsp import LanguageServer
from .outputs import XmlOutput from .outputs import XmlOutput
@ -211,7 +210,7 @@ class BlueprintApp:
except: except:
errored = True errored = True
formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs) formatted_str = formatter.format(data, opts.spaces_num, not opts.tabs)
if data != formatted_str: if data != formatted_str:
happened = "Would format" happened = "Would format"

View file

@ -21,7 +21,7 @@
import unittest import unittest
from pathlib import Path from pathlib import Path
from blueprintcompiler.formatter import Formatter from blueprintcompiler import formatter
class TestFormatter(unittest.TestCase): class TestFormatter(unittest.TestCase):
@ -38,7 +38,7 @@ class TestFormatter(unittest.TestCase):
) as f: ) as f:
expected = f.read() expected = f.read()
actual = Formatter.format(input_data) actual = formatter.format(input_data)
self.assertEqual(actual, expected) self.assertEqual(actual, expected)
def test_formatter(self): def test_formatter(self):