# formatter.py # # Copyright 2023 Gregor Niehl # # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 3 of the # License, or (at your option) any later version. # # This file is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program. If not, see . # # SPDX-License-Identifier: LGPL-3.0-or-later import re from enum import Enum from . import tokenizer from .errors import CompilerBugError from .tokenizer import TokenType OPENING_TOKENS = ("{", "[") CLOSING_TOKENS = ("}", "]") NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=") NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=") # NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>") WHITESPACE_BEFORE = ("{", "|") class LineType(Enum): STATEMENT = 0 BLOCK_OPEN = 1 BLOCK_CLOSE = 2 CHILD_TYPE = 3 COMMENT = 4 def format(data, tab_size=2, insert_space=True): indent_levels = 0 tokens = tokenizer.tokenize(data) end_str = "" last_not_whitespace = tokens[0] current_line = "" prev_line_type = None is_child_type = False indent_item = " " * tab_size if insert_space else "\t" watch_parentheses = False parentheses_balance = 0 bracket_tracker = [None] last_whitespace_contains_newline = False def commit_current_line( line_type=prev_line_type, redo_whitespace=False, newlines_before=1 ): nonlocal end_str, current_line, prev_line_type indent_whitespace = indent_levels * indent_item whitespace_to_add = "\n" + indent_whitespace if redo_whitespace or newlines_before != 1: end_str = end_str.strip() + "\n" * newlines_before if newlines_before > 0: end_str += indent_whitespace end_str += current_line + whitespace_to_add current_line = "" prev_line_type = line_type for item in tokens: str_item = str(item) if item.type == TokenType.WHITESPACE: last_whitespace_contains_newline = "\n" in str_item continue whitespace_required = ( str_item in WHITESPACE_BEFORE or str(last_not_whitespace) in WHITESPACE_AFTER or (str_item == "(" and end_str.endswith(": bind")) ) whitespace_blockers = ( str_item in NO_WHITESPACE_BEFORE or str(last_not_whitespace) in NO_WHITESPACE_AFTER or (str_item == "<" and str(last_not_whitespace) == "typeof") ) this_or_last_is_ident = TokenType.IDENT in (item.type, last_not_whitespace.type) current_line_is_empty = len(current_line) == 0 is_function = str_item == "(" and not re.match( r"^([A-Za-z_\-])+(: bind)?$", current_line ) any_blockers = whitespace_blockers or current_line_is_empty or is_function if (whitespace_required or this_or_last_is_ident) and not any_blockers: current_line += " " current_line += str_item if str_item in ("[", "("): bracket_tracker.append(str_item) elif str_item in ("]", ")"): bracket_tracker.pop() needs_newline_treatment = ( str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT ) if needs_newline_treatment: if str_item in OPENING_TOKENS: list_or_child_type = str_item == "[" if list_or_child_type: is_child_type = current_line.startswith("[") if is_child_type: if str(last_not_whitespace) not in OPENING_TOKENS: end_str = ( end_str.strip() + "\n\n" + (indent_item * indent_levels) ) last_not_whitespace = item continue indent_levels += 1 keep_same_indent = prev_line_type not in ( LineType.CHILD_TYPE, LineType.COMMENT, LineType.BLOCK_OPEN, ) if keep_same_indent: end_str = ( end_str.strip() + "\n\n" + indent_item * (indent_levels - 1) ) commit_current_line(LineType.BLOCK_OPEN) elif str_item == "]" and is_child_type: commit_current_line(LineType.CHILD_TYPE, False) is_child_type = False elif str_item in CLOSING_TOKENS: if str_item == "]" and str(last_not_whitespace) != "[": current_line = current_line[:-1] if str(last_not_whitespace) != ",": current_line += "," commit_current_line() current_line = "]" elif str(last_not_whitespace) in OPENING_TOKENS: end_str = end_str.strip() commit_current_line(LineType.BLOCK_CLOSE, True, 0) indent_levels -= 1 commit_current_line(LineType.BLOCK_CLOSE, True) elif str_item == ";": line_type = LineType.STATEMENT newlines = 1 if len(current_line) == 1: newlines = 0 line_type = LineType.BLOCK_CLOSE elif prev_line_type == LineType.BLOCK_CLOSE: newlines = 2 commit_current_line(line_type, newlines_before=newlines) elif item.type == TokenType.COMMENT: require_extra_newline = ( LineType.BLOCK_CLOSE, LineType.STATEMENT, LineType.COMMENT, ) single_line_comment = str_item.startswith("//") newlines = 1 if single_line_comment: if not str_item.startswith("// "): current_line = f"// {current_line[2:]}" if not last_whitespace_contains_newline: current_line = " " + current_line newlines = 0 elif prev_line_type == LineType.BLOCK_CLOSE: newlines = 2 elif prev_line_type in require_extra_newline: newlines = 2 current_line = "\n".join( [line.rstrip() for line in current_line.split("\n")] ) commit_current_line(LineType.COMMENT, newlines_before=newlines) else: # pragma: no cover raise CompilerBugError() elif str_item == "(" and ( re.match(r"^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses ): watch_parentheses = True parentheses_balance += 1 elif str_item == ")" and watch_parentheses: parentheses_balance -= 1 all_parentheses_closed = parentheses_balance == 0 if all_parentheses_closed: commit_current_line( newlines_before=2 if prev_line_type == LineType.BLOCK_CLOSE else 1 ) watch_parentheses = False tracker_is_empty = len(bracket_tracker) > 0 if tracker_is_empty: last_in_tracker = bracket_tracker[-1] is_list_comma = last_in_tracker == "[" and str_item == "," if is_list_comma: last_was_list_item = end_str.strip()[-1] not in ("[", ",") if last_was_list_item: end_str = end_str.strip() commit_current_line() last_not_whitespace = item last_whitespace_contains_newline = False return end_str.strip() + "\n"