blueprint-compiler/blueprintcompiler/formatter.py

# formatter.py
#
# Copyright 2023 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later

import re
from enum import Enum

from . import tokenizer, utils
from .tokenizer import TokenType

OPENING_TOKENS = ("{", "[")
CLOSING_TOKENS = ("}", "]")

NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS

NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")

# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
WHITESPACE_BEFORE = ("{", "|")


class LineType(Enum):
    STATEMENT = 0
    BLOCK_OPEN = 1
    BLOCK_CLOSE = 2
    CHILD_TYPE = 3
    COMMENT = 4


class Format:
    def format(data, tab_size=2, insert_space=True):
        indent_levels = 0
        tokens = tokenizer.tokenize(data)
        end_str = ""
        last_not_whitespace = tokens[0]
        current_line = ""
        prev_line_type = None
        is_child_type = False
        indent_item = " " * tab_size if insert_space else "\t"
        watch_parentheses = False
        parentheses_balance = 0
        bracket_tracker = [None]

        def commit_current_line(
            line_type=prev_line_type, redo_whitespace=False, newlines_before=1
        ):
            nonlocal end_str, current_line, prev_line_type

            indent_whitespace = indent_levels * indent_item
            whitespace_to_add = "\n" + indent_whitespace

            if redo_whitespace or newlines_before != 1:
                end_str = end_str.strip() + ("\n" * newlines_before)
                if newlines_before > 0:
                    end_str += indent_whitespace

            end_str += current_line + whitespace_to_add

            current_line = ""
            prev_line_type = line_type

        for item in tokens:
            if item.type != TokenType.WHITESPACE:
                str_item = str(item)
                if item.type == TokenType.QUOTED and str_item.startswith('"'):
                    str_item = utils.escape_quote(utils.unescape_quote(str_item))

                if (
                    len(current_line) > 0
                    and (
                        str_item in WHITESPACE_BEFORE
                        or item.type == TokenType.IDENT
                        or str(last_not_whitespace) in WHITESPACE_AFTER
                        or last_not_whitespace.type == TokenType.IDENT
                    )
                    and str_item not in NO_WHITESPACE_BEFORE
                    and str(last_not_whitespace) not in NO_WHITESPACE_AFTER
                    and not (str_item == ":" and current_line.startswith("template "))
                    and not (
                        str_item == "("
                        and not re.match(r"^([A-Za-z_\-])+$", current_line)
                    )
                ):
                    current_line += " "

                current_line += str_item

                if str_item in ["[", "("]:
                    bracket_tracker.append(str_item)
                elif str_item in ["]", ")"]:
                    bracket_tracker.pop()

                if str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT:
                    if str_item in OPENING_TOKENS:
                        if str_item == "[":
                            is_child_type = current_line.startswith("[")
                            if is_child_type:
                                if str(last_not_whitespace) not in OPENING_TOKENS:
                                    end_str = (
                                        end_str.strip()
                                        + "\n\n"
                                        + (indent_item * indent_levels)
                                    )
                                last_not_whitespace = item
                                continue

                        indent_levels += 1
                        if not (
                            prev_line_type in [LineType.CHILD_TYPE, LineType.COMMENT]
                            or prev_line_type == LineType.BLOCK_OPEN
                        ):
                            end_str = (
                                end_str.strip()
                                + "\n\n"
                                + (indent_item * (indent_levels - 1))
                            )
                        commit_current_line(
                            LineType.BLOCK_OPEN,
                        )

                    elif str_item == "]" and is_child_type:
                        commit_current_line(
                            LineType.CHILD_TYPE,
                            False,
                        )
                        is_child_type = False

                    elif str_item in CLOSING_TOKENS:
                        if str_item == "]" and last_not_whitespace != ",":
                            current_line = current_line[:-1]
                            commit_current_line()
                            current_line = "]"

                        indent_levels -= 1
                        commit_current_line(
                            LineType.BLOCK_CLOSE,
                            True,
                        )

                    elif str_item == ";":
                        line_type = LineType.STATEMENT
                        if len(current_line) == 1:
                            newlines = 0
                            line_type = LineType.BLOCK_CLOSE
                        elif prev_line_type == LineType.BLOCK_CLOSE:
                            newlines = 2
                        else:
                            newlines = 1

                        commit_current_line(line_type, newlines_before=newlines)

                    elif item.type == TokenType.COMMENT:
                        if str_item.startswith("//"):
                            newlines = (
                                2 if prev_line_type == LineType.BLOCK_CLOSE else 1
                            )
                        else:
                            newlines = (
                                2
                                if prev_line_type
                                in [
                                    LineType.BLOCK_CLOSE,
                                    LineType.STATEMENT,
                                    LineType.COMMENT,
                                ]
                                else 1
                            )

                        commit_current_line(
                            LineType.COMMENT,
                            newlines_before=newlines,
                        )

                    else:
                        commit_current_line()

                elif str_item == "(" and (
                    re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
                ):
                    watch_parentheses = True
                    parentheses_balance += 1

                elif str_item == ")" and watch_parentheses:
                    parentheses_balance -= 1
                    if parentheses_balance == 0:
                        commit_current_line(
                            newlines_before=2
                            if prev_line_type == LineType.BLOCK_CLOSE
                            else 1
                        )
                        watch_parentheses = False

                if len(bracket_tracker) > 0:
                    if bracket_tracker[-1] == "[" and str_item == ",":
                        if end_str.strip()[-1] not in ["[", ","]:
                            end_str = end_str.strip()
                        commit_current_line()

                last_not_whitespace = item

        return end_str