blueprint-compiler/blueprintcompiler/formatter.py
2023-09-28 17:33:43 -05:00

218 lines
8.2 KiB
Python

# formatter.py
#
# Copyright 2023 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import re
from enum import Enum
from . import tokenizer, utils
from .tokenizer import TokenType
OPENING_TOKENS = ("{", "[")
CLOSING_TOKENS = ("}", "]")
NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
WHITESPACE_BEFORE = ("{", "|")
class LineType(Enum):
STATEMENT = 0
BLOCK_OPEN = 1
BLOCK_CLOSE = 2
CHILD_TYPE = 3
COMMENT = 4
class Format:
def format(data, tab_size=2, insert_space=True):
indent_levels = 0
tokens = tokenizer.tokenize(data)
end_str = ""
last_not_whitespace = tokens[0]
current_line = ""
prev_line_type = None
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
watch_parentheses = False
parentheses_balance = 0
bracket_tracker = [None]
def commit_current_line(
line_type=prev_line_type, redo_whitespace=False, newlines_before=1
):
nonlocal end_str, current_line, prev_line_type
indent_whitespace = indent_levels * indent_item
whitespace_to_add = "\n" + indent_whitespace
if redo_whitespace or newlines_before != 1:
end_str = end_str.strip() + ("\n" * newlines_before)
if newlines_before > 0:
end_str += indent_whitespace
end_str += current_line + whitespace_to_add
current_line = ""
prev_line_type = line_type
for item in tokens:
if item.type != TokenType.WHITESPACE:
str_item = str(item)
if item.type == TokenType.QUOTED and str_item.startswith('"'):
str_item = utils.escape_quote(utils.unescape_quote(str_item))
if (
len(current_line) > 0
and (
str_item in WHITESPACE_BEFORE
or item.type == TokenType.IDENT
or str(last_not_whitespace) in WHITESPACE_AFTER
or last_not_whitespace.type == TokenType.IDENT
)
and str_item not in NO_WHITESPACE_BEFORE
and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
and not (str_item == ":" and current_line.startswith("template "))
and not (
str_item == "("
and not re.match(r"^([A-Za-z_\-])+$", current_line)
)
):
current_line += " "
current_line += str_item
if str_item in ["[", "("]:
bracket_tracker.append(str_item)
elif str_item in ["]", ")"]:
bracket_tracker.pop()
if str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT:
if str_item in OPENING_TOKENS:
if str_item == "[":
is_child_type = current_line.startswith("[")
if is_child_type:
if str(last_not_whitespace) not in OPENING_TOKENS:
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * indent_levels)
)
last_not_whitespace = item
continue
indent_levels += 1
if not (
prev_line_type in [LineType.CHILD_TYPE, LineType.COMMENT]
or prev_line_type == LineType.BLOCK_OPEN
):
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * (indent_levels - 1))
)
commit_current_line(
LineType.BLOCK_OPEN,
)
elif str_item == "]" and is_child_type:
commit_current_line(
LineType.CHILD_TYPE,
False,
)
is_child_type = False
elif str_item in CLOSING_TOKENS:
if str_item == "]" and last_not_whitespace != ",":
current_line = current_line[:-1]
commit_current_line()
current_line = "]"
indent_levels -= 1
commit_current_line(
LineType.BLOCK_CLOSE,
True,
)
elif str_item == ";":
line_type = LineType.STATEMENT
if len(current_line) == 1:
newlines = 0
line_type = LineType.BLOCK_CLOSE
elif prev_line_type == LineType.BLOCK_CLOSE:
newlines = 2
else:
newlines = 1
commit_current_line(line_type, newlines_before=newlines)
elif item.type == TokenType.COMMENT:
if str_item.startswith("//"):
newlines = (
2 if prev_line_type == LineType.BLOCK_CLOSE else 1
)
else:
newlines = (
2
if prev_line_type
in [
LineType.BLOCK_CLOSE,
LineType.STATEMENT,
LineType.COMMENT,
]
else 1
)
commit_current_line(
LineType.COMMENT,
newlines_before=newlines,
)
else:
commit_current_line()
elif str_item == "(" and (
re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
):
watch_parentheses = True
parentheses_balance += 1
elif str_item == ")" and watch_parentheses:
parentheses_balance -= 1
if parentheses_balance == 0:
commit_current_line(
newlines_before=2
if prev_line_type == LineType.BLOCK_CLOSE
else 1
)
watch_parentheses = False
if len(bracket_tracker) > 0:
if bracket_tracker[-1] == "[" and str_item == ",":
if end_str.strip()[-1] not in ["[", ","]:
end_str = end_str.strip()
commit_current_line()
last_not_whitespace = item
return end_str