blueprint-compiler/blueprintcompiler/formatter.py
2023-09-16 15:29:45 +02:00

177 lines
6.1 KiB
Python

# decompiler.py
#
# Copyright 2021 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
from enum import Enum
from . import tokenizer
OPENING_TOKENS = ["{", "["]
CLOSING_TOKENS = ["}", "]"]
NEWLINE_AFTER = [";"] + OPENING_TOKENS + CLOSING_TOKENS
NO_WHITESPACE_BEFORE = [",", ":", "::", ";", ")", ".", ">"]
NO_WHITESPACE_AFTER = ["C_", "_", "("]
# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
WHITESPACE_AFTER = [
":",
",",
">",
")",
]
WHITESPACE_BEFORE = ["{", "$"]
class LineType(Enum):
STATEMENT = 0
BLOCK_OPEN = 1
BLOCK_CLOSE = 2
CHILD_TYPE = 3
class Format:
def format(data, tab_size=2, insert_space=True):
indent_levels = 0
tokens = tokenizer.tokenize(data)
end_str = ""
last_not_whitespace = tokens[0]
current_line = ""
prev_line_type = None
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
def another_newline(one_indent_less=False):
nonlocal end_str
end_str = (
end_str.strip()
+ "\n\n"
+ (
indent_item
* (indent_levels - 1 if one_indent_less else indent_levels)
)
)
def commit_current_line(
two_newlines=False, line_type=prev_line_type, indent_decrease=False
):
nonlocal end_str, current_line, prev_line_type
if indent_decrease:
end_str = end_str.strip() + "\n" + (indent_levels * indent_item)
if two_newlines:
another_newline(
not (
current_line[-1] == ";"
and end_str.strip()[-1] in CLOSING_TOKENS
)
)
end_str += current_line + "\n" + (indent_levels * indent_item)
current_line = ""
prev_line_type = line_type
for item in tokens:
if item.type != tokenizer.TokenType.WHITESPACE:
str_item = str(item)
if item.type == tokenizer.TokenType.QUOTED and str_item.startswith('"'):
str_item = (
"'"
+ str_item[1:-1].replace('\\"', '"').replace("'", "\\'")
+ "'"
)
if (
str_item in WHITESPACE_BEFORE
and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
) or (
(
str(last_not_whitespace) in WHITESPACE_AFTER
or last_not_whitespace.type == tokenizer.TokenType.IDENT
)
and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
and str_item not in NO_WHITESPACE_BEFORE
):
current_line += " "
current_line += str_item
if (
str_item in NEWLINE_AFTER
or item.type == tokenizer.TokenType.COMMENT
):
if str_item in OPENING_TOKENS:
if str_item == "[":
is_child_type = (current_line + "[").startswith("[")
if is_child_type:
NO_WHITESPACE_BEFORE.append("]")
if str(last_not_whitespace) not in OPENING_TOKENS:
another_newline()
last_not_whitespace = item
continue
else:
NEWLINE_AFTER.append(",")
WHITESPACE_AFTER.remove(",")
indent_levels += 1
commit_current_line(
not (
prev_line_type == LineType.CHILD_TYPE
or end_str.strip()[-1] in OPENING_TOKENS
),
LineType.BLOCK_OPEN,
)
elif str_item in CLOSING_TOKENS:
if str_item == "]":
if is_child_type:
NO_WHITESPACE_BEFORE.remove("]")
indent_levels += 1
else:
WHITESPACE_AFTER.append(",")
NEWLINE_AFTER.remove(",")
if last_not_whitespace != ",":
current_line = current_line[:-1]
commit_current_line()
current_line = "]"
indent_levels -= 1
commit_current_line(
line_type=LineType.CHILD_TYPE
if is_child_type
else LineType.BLOCK_CLOSE,
indent_decrease=not is_child_type,
)
is_child_type = False
elif str_item == ";" and len(end_str) > 0:
commit_current_line(
two_newlines=end_str.strip()[-1] in CLOSING_TOKENS
)
else:
commit_current_line()
last_not_whitespace = item
return end_str