mirror of
https://gitlab.gnome.org/jwestman/blueprint-compiler.git
synced 2025-06-21 23:19:24 -04:00
Blueprint allows both `//` and `/*...*/` style comments, but if a project prefers only `/*...*/` comments, it is currently not possible to have these inline. Therefore, treat these comments equal if they occur inline. To make this easier to understand, we refactor the comment handling slightly to first handle single-line comment whitespace, and then handle newlines for both single-line and multi-line style comments. Adjust the test accordingly to make sure this works.
232 lines
8.2 KiB
Python
232 lines
8.2 KiB
Python
# formatter.py
|
|
#
|
|
# Copyright 2023 Gregor Niehl <gregorniehl@web.de>
|
|
#
|
|
# This file is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU Lesser General Public License as
|
|
# published by the Free Software Foundation; either version 3 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# This file is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
|
|
|
import re
|
|
from enum import Enum
|
|
|
|
from . import tokenizer
|
|
from .errors import CompilerBugError
|
|
from .tokenizer import TokenType
|
|
|
|
OPENING_TOKENS = ("{", "[")
|
|
CLOSING_TOKENS = ("}", "]")
|
|
|
|
NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
|
|
|
|
NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
|
|
NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
|
|
|
|
# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
|
|
WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
|
|
WHITESPACE_BEFORE = ("{", "|")
|
|
|
|
|
|
class LineType(Enum):
|
|
STATEMENT = 0
|
|
BLOCK_OPEN = 1
|
|
BLOCK_CLOSE = 2
|
|
CHILD_TYPE = 3
|
|
COMMENT = 4
|
|
|
|
|
|
def format(data, tab_size=2, insert_space=True):
|
|
indent_levels = 0
|
|
tokens = tokenizer.tokenize(data)
|
|
end_str = ""
|
|
last_not_whitespace = tokens[0]
|
|
current_line = ""
|
|
prev_line_type = None
|
|
is_child_type = False
|
|
indent_item = " " * tab_size if insert_space else "\t"
|
|
watch_parentheses = False
|
|
parentheses_balance = 0
|
|
bracket_tracker = [None]
|
|
last_whitespace_contains_newline = False
|
|
|
|
def commit_current_line(
|
|
line_type=prev_line_type, redo_whitespace=False, newlines_before=1
|
|
):
|
|
nonlocal end_str, current_line, prev_line_type
|
|
|
|
indent_whitespace = indent_levels * indent_item
|
|
whitespace_to_add = "\n" + indent_whitespace
|
|
|
|
if redo_whitespace or newlines_before != 1:
|
|
end_str = end_str.strip() + "\n" * newlines_before
|
|
if newlines_before > 0:
|
|
end_str += indent_whitespace
|
|
|
|
end_str += current_line + whitespace_to_add
|
|
|
|
current_line = ""
|
|
prev_line_type = line_type
|
|
|
|
for item in tokens:
|
|
str_item = str(item)
|
|
|
|
if item.type == TokenType.WHITESPACE:
|
|
last_whitespace_contains_newline = "\n" in str_item
|
|
continue
|
|
|
|
whitespace_required = (
|
|
str_item in WHITESPACE_BEFORE
|
|
or str(last_not_whitespace) in WHITESPACE_AFTER
|
|
or (str_item == "(" and end_str.endswith(": bind"))
|
|
)
|
|
whitespace_blockers = (
|
|
str_item in NO_WHITESPACE_BEFORE
|
|
or str(last_not_whitespace) in NO_WHITESPACE_AFTER
|
|
or (str_item == "<" and str(last_not_whitespace) == "typeof")
|
|
)
|
|
|
|
this_or_last_is_ident = TokenType.IDENT in (item.type, last_not_whitespace.type)
|
|
current_line_is_empty = len(current_line) == 0
|
|
is_function = str_item == "(" and not re.match(
|
|
r"^([A-Za-z_\-])+(: bind)?$", current_line
|
|
)
|
|
|
|
any_blockers = whitespace_blockers or current_line_is_empty or is_function
|
|
if (whitespace_required or this_or_last_is_ident) and not any_blockers:
|
|
current_line += " "
|
|
|
|
current_line += str_item
|
|
|
|
if str_item in ("[", "("):
|
|
bracket_tracker.append(str_item)
|
|
elif str_item in ("]", ")"):
|
|
bracket_tracker.pop()
|
|
|
|
needs_newline_treatment = (
|
|
str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
|
|
)
|
|
if needs_newline_treatment:
|
|
if str_item in OPENING_TOKENS:
|
|
list_or_child_type = str_item == "["
|
|
if list_or_child_type:
|
|
is_child_type = current_line.startswith("[")
|
|
|
|
if is_child_type:
|
|
if str(last_not_whitespace) not in OPENING_TOKENS:
|
|
end_str = (
|
|
end_str.strip() + "\n\n" + (indent_item * indent_levels)
|
|
)
|
|
last_not_whitespace = item
|
|
continue
|
|
|
|
indent_levels += 1
|
|
keep_same_indent = prev_line_type not in (
|
|
LineType.CHILD_TYPE,
|
|
LineType.COMMENT,
|
|
LineType.BLOCK_OPEN,
|
|
)
|
|
if keep_same_indent:
|
|
end_str = (
|
|
end_str.strip() + "\n\n" + indent_item * (indent_levels - 1)
|
|
)
|
|
commit_current_line(LineType.BLOCK_OPEN)
|
|
|
|
elif str_item == "]" and is_child_type:
|
|
commit_current_line(LineType.CHILD_TYPE, False)
|
|
is_child_type = False
|
|
|
|
elif str_item in CLOSING_TOKENS:
|
|
if str_item == "]" and str(last_not_whitespace) != "[":
|
|
current_line = current_line[:-1]
|
|
if str(last_not_whitespace) != ",":
|
|
current_line += ","
|
|
commit_current_line()
|
|
current_line = "]"
|
|
elif str(last_not_whitespace) in OPENING_TOKENS:
|
|
end_str = end_str.strip()
|
|
commit_current_line(LineType.BLOCK_CLOSE, True, 0)
|
|
|
|
indent_levels -= 1
|
|
commit_current_line(LineType.BLOCK_CLOSE, True)
|
|
|
|
elif str_item == ";":
|
|
line_type = LineType.STATEMENT
|
|
newlines = 1
|
|
|
|
if len(current_line) == 1:
|
|
newlines = 0
|
|
line_type = LineType.BLOCK_CLOSE
|
|
elif prev_line_type == LineType.BLOCK_CLOSE:
|
|
newlines = 2
|
|
|
|
commit_current_line(line_type, newlines_before=newlines)
|
|
|
|
elif item.type == TokenType.COMMENT:
|
|
require_extra_newline = (
|
|
LineType.BLOCK_CLOSE,
|
|
LineType.STATEMENT,
|
|
LineType.COMMENT,
|
|
)
|
|
|
|
single_line_comment = str_item.startswith("//")
|
|
if single_line_comment and not str_item.startswith("// "):
|
|
current_line = f"// {current_line[2:]}"
|
|
|
|
inline_comment = not last_whitespace_contains_newline
|
|
newlines = 1
|
|
if inline_comment:
|
|
current_line = " " + current_line
|
|
newlines = 0
|
|
elif single_line_comment:
|
|
if prev_line_type == LineType.BLOCK_CLOSE:
|
|
newlines = 2
|
|
elif prev_line_type in require_extra_newline:
|
|
newlines = 2
|
|
|
|
current_line = "\n".join(
|
|
[line.rstrip() for line in current_line.split("\n")]
|
|
)
|
|
commit_current_line(LineType.COMMENT, newlines_before=newlines)
|
|
|
|
else: # pragma: no cover
|
|
raise CompilerBugError()
|
|
|
|
elif str_item == "(" and (
|
|
re.match(r"^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
|
|
):
|
|
watch_parentheses = True
|
|
parentheses_balance += 1
|
|
|
|
elif str_item == ")" and watch_parentheses:
|
|
parentheses_balance -= 1
|
|
all_parentheses_closed = parentheses_balance == 0
|
|
if all_parentheses_closed:
|
|
commit_current_line(
|
|
newlines_before=2 if prev_line_type == LineType.BLOCK_CLOSE else 1
|
|
)
|
|
watch_parentheses = False
|
|
|
|
tracker_is_empty = len(bracket_tracker) > 0
|
|
if tracker_is_empty:
|
|
last_in_tracker = bracket_tracker[-1]
|
|
is_list_comma = last_in_tracker == "[" and str_item == ","
|
|
if is_list_comma:
|
|
last_was_list_item = end_str.strip()[-1] not in ("[", ",")
|
|
if last_was_list_item:
|
|
end_str = end_str.strip()
|
|
commit_current_line()
|
|
|
|
last_not_whitespace = item
|
|
last_whitespace_contains_newline = False
|
|
|
|
return end_str.strip() + "\n"
|