blueprint-compiler/blueprintcompiler/formatter.py
2023-08-22 16:52:24 +02:00

135 lines
4.9 KiB
Python

# decompiler.py
#
# Copyright 2021 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
from enum import Enum
from . import tokenizer
OPENING_TOKENS = ["{", "["]
CLOSING_TOKENS = ["}", "]"]
NEWLINE_AFTER = [";"] + OPENING_TOKENS + CLOSING_TOKENS
NO_WHITESPACE_BEFORE = [",", ":", ";", ")", "."]
NO_WHITESPACE_AFTER = ["C_", "_", "("]
WHITESPACE_AFTER = [":", ","]
WHITESPACE_BEFORE = ["{"]
class LineType(Enum):
STATEMENT = 0
BLOCK_OPEN = 1
BLOCK_CLOSE = 2
CHILD_TYPE = 3
class Format:
def format(data):
indent_levels = 0
tokens = tokenizer.tokenize(data)
tokenized_str = ""
last_not_whitespace = tokens[0]
current_line = ""
prev_line_type = None
def commit_current_line(extra_newlines, line_type, new_indent_levels):
nonlocal tokenized_str, current_line, prev_line_type
current_line += "\n" + (new_indent_levels * " ")
tokenized_str += (
current_line
if extra_newlines == 0
else ("\n" * extra_newlines) + (" " * indent_levels) + current_line
)
current_line = ""
prev_line_type = line_type
for item in tokens:
if item.type != tokenizer.TokenType.WHITESPACE:
item_as_string = str(item)
if (
item_as_string in WHITESPACE_BEFORE
and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
) or (
(
str(last_not_whitespace) in WHITESPACE_AFTER
or last_not_whitespace.type == tokenizer.TokenType.IDENT
)
and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
and item_as_string not in NO_WHITESPACE_BEFORE
):
current_line += " "
current_line += item_as_string
if (
item_as_string in NEWLINE_AFTER
or item.type == tokenizer.TokenType.COMMENT
):
if item_as_string in OPENING_TOKENS:
# num_newlines = 1 if prev_line_type == LineType.CHILD_TYPE else 2
# prev_line_type = LineType.BLOCK_OPEN
# tokenized_str = (
# tokenized_str.strip()
# + (num_newlines * "\n")
# + (indent_levels * " ")
# )
# current_line += "\n" + (indent_levels * " ")
# tokenized_str += current_line
# current_line = ""
commit_current_line(
0 if prev_line_type == LineType.CHILD_TYPE else 1,
LineType.BLOCK_OPEN,
indent_levels + 1,
)
indent_levels += 1
elif item_as_string in CLOSING_TOKENS:
# tokenized_str = (
# tokenized_str.strip() + "\n" + (indent_levels * " ")
# )
# current_line += "\n" + (indent_levels * " ")
# tokenized_str += current_line
# current_line = ""
indent_levels -= 1
commit_current_line(
0,
# LineType.CHILD_TYPE
# if current_line.strip().startswith("[")
# else LineType.BLOCK_CLOSE,
LineType.BLOCK_CLOSE,
indent_levels - 1,
)
# indent_levels -= 1
# prev_line_type = (
# LineType.CHILD_TYPE
# if current_line.strip().startswith("[")
# else LineType.BLOCK_CLOSE
# )
else:
commit_current_line(0, prev_line_type, indent_levels)
last_not_whitespace = item
print(tokenized_str) # TODO: Remove this when the MR is ready to be merged
return tokenized_str