# decompiler.py # # Copyright 2021 James Westman # # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 3 of the # License, or (at your option) any later version. # # This file is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program. If not, see . # # SPDX-License-Identifier: LGPL-3.0-or-later from enum import Enum from . import tokenizer OPENING_TOKENS = ["{", "["] CLOSING_TOKENS = ["}", "]"] NEWLINE_AFTER = [";"] + OPENING_TOKENS + CLOSING_TOKENS NO_WHITESPACE_BEFORE = [",", ":", ";", ")", "."] NO_WHITESPACE_AFTER = ["C_", "_", "("] WHITESPACE_AFTER = [":", ","] WHITESPACE_BEFORE = ["{"] class LineType(Enum): STATEMENT = 0 BLOCK_OPEN = 1 BLOCK_CLOSE = 2 CHILD_TYPE = 3 class Format: def format(data): indent_levels = 0 tokens = tokenizer.tokenize(data) tokenized_str = "" last_not_whitespace = tokens[0] current_line = "" prev_line_type = None def commit_current_line(extra_newlines, line_type, new_indent_levels): nonlocal tokenized_str, current_line, prev_line_type current_line += "\n" + (new_indent_levels * " ") tokenized_str += ( current_line if extra_newlines == 0 else ("\n" * extra_newlines) + (" " * indent_levels) + current_line ) current_line = "" prev_line_type = line_type for item in tokens: if item.type != tokenizer.TokenType.WHITESPACE: item_as_string = str(item) if ( item_as_string in WHITESPACE_BEFORE and str(last_not_whitespace) not in NO_WHITESPACE_AFTER ) or ( ( str(last_not_whitespace) in WHITESPACE_AFTER or last_not_whitespace.type == tokenizer.TokenType.IDENT ) and str(last_not_whitespace) not in NO_WHITESPACE_AFTER and item_as_string not in NO_WHITESPACE_BEFORE ): current_line += " " current_line += item_as_string if ( item_as_string in NEWLINE_AFTER or item.type == tokenizer.TokenType.COMMENT ): if item_as_string in OPENING_TOKENS: # num_newlines = 1 if prev_line_type == LineType.CHILD_TYPE else 2 # prev_line_type = LineType.BLOCK_OPEN # tokenized_str = ( # tokenized_str.strip() # + (num_newlines * "\n") # + (indent_levels * " ") # ) # current_line += "\n" + (indent_levels * " ") # tokenized_str += current_line # current_line = "" commit_current_line( 0 if prev_line_type == LineType.CHILD_TYPE else 1, LineType.BLOCK_OPEN, indent_levels + 1, ) indent_levels += 1 elif item_as_string in CLOSING_TOKENS: # tokenized_str = ( # tokenized_str.strip() + "\n" + (indent_levels * " ") # ) # current_line += "\n" + (indent_levels * " ") # tokenized_str += current_line # current_line = "" indent_levels -= 1 commit_current_line( 0, # LineType.CHILD_TYPE # if current_line.strip().startswith("[") # else LineType.BLOCK_CLOSE, LineType.BLOCK_CLOSE, indent_levels - 1, ) # indent_levels -= 1 # prev_line_type = ( # LineType.CHILD_TYPE # if current_line.strip().startswith("[") # else LineType.BLOCK_CLOSE # ) else: commit_current_line(0, prev_line_type, indent_levels) last_not_whitespace = item print(tokenized_str) # TODO: Remove this when the MR is ready to be merged return tokenized_str