Add a formatter

This commit is contained in:
Gregor Niehl 2023-11-03 06:48:28 -05:00 committed by James Westman
parent 2faa9207de
commit 4fa64cdf33
12 changed files with 613 additions and 11 deletions

View file

@ -0,0 +1,225 @@
# formatter.py
#
# Copyright 2023 Gregor Niehl <gregorniehl@web.de>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import re
from enum import Enum
from . import tokenizer, utils
from .tokenizer import TokenType
OPENING_TOKENS = ("{", "[")
CLOSING_TOKENS = ("}", "]")
NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
WHITESPACE_BEFORE = ("{", "|")
class LineType(Enum):
STATEMENT = 0
BLOCK_OPEN = 1
BLOCK_CLOSE = 2
CHILD_TYPE = 3
COMMENT = 4
class Formatter:
def format(data, tab_size=2, insert_space=True):
indent_levels = 0
tokens = tokenizer.tokenize(data)
end_str = ""
last_not_whitespace = tokens[0]
current_line = ""
prev_line_type = None
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
watch_parentheses = False
parentheses_balance = 0
bracket_tracker = [None]
def commit_current_line(
line_type=prev_line_type, redo_whitespace=False, newlines_before=1
):
nonlocal end_str, current_line, prev_line_type
indent_whitespace = indent_levels * indent_item
whitespace_to_add = "\n" + indent_whitespace
if redo_whitespace or newlines_before != 1:
end_str = end_str.strip() + ("\n" * newlines_before)
if newlines_before > 0:
end_str += indent_whitespace
end_str += current_line + whitespace_to_add
current_line = ""
prev_line_type = line_type
for item in tokens:
if item.type != TokenType.WHITESPACE:
str_item = str(item)
whitespace_required = (
str_item in WHITESPACE_BEFORE
or str(last_not_whitespace) in WHITESPACE_AFTER
)
whitespace_blockers = (
str_item in NO_WHITESPACE_BEFORE
or str(last_not_whitespace) in NO_WHITESPACE_AFTER
)
this_or_last_is_ident = (
item.type == TokenType.IDENT
or last_not_whitespace.type == TokenType.IDENT
)
current_line_is_empty = len(current_line) == 0
just_one_word_on_line = re.match(r"^([A-Za-z_\-])+$", current_line)
is_short_object_def = str_item == "(" and not just_one_word_on_line
any_blockers = (
whitespace_blockers or current_line_is_empty or is_short_object_def
)
if (whitespace_required or this_or_last_is_ident) and not any_blockers:
current_line += " "
current_line += str_item
if str_item in ["[", "("]:
bracket_tracker.append(str_item)
elif str_item in ["]", ")"]:
bracket_tracker.pop()
needs_newline_treatment = (
str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
)
if needs_newline_treatment:
if str_item in OPENING_TOKENS:
list_or_child_type = str_item == "["
if list_or_child_type:
is_child_type = current_line.startswith("[")
if is_child_type:
if str(last_not_whitespace) not in OPENING_TOKENS:
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * indent_levels)
)
last_not_whitespace = item
continue
indent_levels += 1
keep_same_indent = not prev_line_type in [
LineType.CHILD_TYPE,
LineType.COMMENT,
LineType.BLOCK_OPEN,
]
if keep_same_indent:
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * (indent_levels - 1))
)
commit_current_line(LineType.BLOCK_OPEN)
elif str_item == "]" and is_child_type:
commit_current_line(LineType.CHILD_TYPE, False)
is_child_type = False
elif str_item in CLOSING_TOKENS:
if str_item == "]" and last_not_whitespace != ",":
current_line = current_line[:-1]
commit_current_line()
current_line = "]"
indent_levels -= 1
commit_current_line(LineType.BLOCK_CLOSE, True)
elif str_item == ";":
line_type = LineType.STATEMENT
if len(current_line) == 1:
newlines = 0
line_type = LineType.BLOCK_CLOSE
elif prev_line_type == LineType.BLOCK_CLOSE:
newlines = 2
else:
newlines = 1
commit_current_line(line_type, newlines_before=newlines)
elif item.type == TokenType.COMMENT:
require_extra_newline = [
LineType.BLOCK_CLOSE,
LineType.STATEMENT,
LineType.COMMENT,
]
single_line_comment = str_item.startswith("//")
if (
single_line_comment
and prev_line_type == LineType.BLOCK_CLOSE
) or (
not single_line_comment
and prev_line_type in require_extra_newline
):
newlines = 2
else:
newlines = 1
commit_current_line(LineType.COMMENT, newlines_before=newlines)
else:
commit_current_line()
elif str_item == "(" and (
re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
):
watch_parentheses = True
parentheses_balance += 1
elif str_item == ")" and watch_parentheses:
parentheses_balance -= 1
all_parentheses_closed = parentheses_balance == 0
if all_parentheses_closed:
commit_current_line(
newlines_before=2
if prev_line_type == LineType.BLOCK_CLOSE
else 1
)
watch_parentheses = False
tracker_is_empty = len(bracket_tracker) > 0
if tracker_is_empty:
last_in_tracker = bracket_tracker[-1]
is_list_comma = last_in_tracker == "[" and str_item == ","
if is_list_comma:
last_was_list_item = end_str.strip()[-1] not in ["[", ","]
if last_was_list_item:
end_str = end_str.strip()
commit_current_line()
last_not_whitespace = item
return end_str

View file

@ -22,11 +22,13 @@ import json
import sys
import traceback
import typing as T
from difflib import SequenceMatcher
from . import decompiler, parser, tokenizer, utils, xml_reader
from .ast_utils import AstNode
from .completions import complete
from .errors import CompileError, MultipleErrors
from .formatter import Formatter
from .lsp_utils import *
from .outputs.xml import XmlOutput
from .tokenizer import Token
@ -211,6 +213,7 @@ class LanguageServer:
"hoverProvider": True,
"documentSymbolProvider": True,
"definitionProvider": True,
"documentFormattingProvider": True,
},
"serverInfo": {
"name": "Blueprint",
@ -280,6 +283,38 @@ class LanguageServer:
id, [completion.to_json(True) for completion in completions]
)
@command("textDocument/formatting")
def formatting(self, id, params):
open_file = self._open_files[params["textDocument"]["uri"]]
if open_file.text is None:
self._send_error(id, ErrorCode.RequestFailed, "Document is not open")
return
try:
formatted_blp = Format.format(
open_file.text,
params["options"]["tabSize"],
params["options"]["insertSpaces"],
)
except PrintableError:
self._send_error(id, ErrorCode.RequestFailed, "Could not format document")
return
lst = []
for tag, i1, i2, j1, j2 in SequenceMatcher(
None, open_file.text, formatted_blp
).get_opcodes():
if tag in ("replace", "insert", "delete"):
lst.append(
TextEdit(
Range(i1, i2, open_file.text),
"" if tag == "delete" else formatted_blp[j1:j2],
).to_json()
)
self._send_response(id, lst)
@command("textDocument/x-blueprint-compile")
def compile(self, id, params):
open_file = self._open_files[params["textDocument"]["uri"]]

View file

@ -186,3 +186,12 @@ class LocationLink:
"targetRange": self.target_range.to_json(),
"targetSelectionRange": self.target_selection_range.to_json(),
}
@dataclass
class TextEdit:
range: Range
newText: str
def to_json(self):
return {"range": self.range.to_json(), "newText": self.newText}

View file

@ -19,13 +19,14 @@
import argparse
import json
import difflib
import os
import sys
import typing as T
from . import interactive_port, parser, tokenizer
from .errors import CompileError, CompilerBugError, PrintableError, report_bug
from .formatter import Formatter
from .gir import add_typelib_search_path
from .lsp import LanguageServer
from .outputs import XmlOutput
@ -66,6 +67,36 @@ class BlueprintApp:
type=argparse.FileType("r"),
)
format = self.add_subcommand(
"format", "Format given blueprint files", self.cmd_format
)
format.add_argument(
"-f",
"--fix",
help="Apply the edits to the files",
default=False,
action="store_true",
)
format.add_argument(
"-t",
"--tabs",
help="Use tabs instead of spaces",
default=False,
action="store_true",
)
format.add_argument(
"-s",
"--spaces-num",
help="How many spaces should be used per indent",
default=2,
type=int,
)
format.add_argument(
"inputs",
nargs="+",
metavar="filenames",
)
port = self.add_subcommand("port", "Interactive porting tool", self.cmd_port)
lsp = self.add_subcommand(
@ -152,6 +183,111 @@ class BlueprintApp:
e.pretty_print(file.name, data)
sys.exit(1)
def cmd_format(self, opts):
input_files = []
missing_files = []
panic = False
formatted_files = 0
skipped_files = 0
for path in opts.inputs:
if os.path.isfile(path):
input_files.append(path)
elif os.path.isdir(path):
for root, subfolders, files in os.walk(path):
for file in files:
if file.endswith(".blp"):
input_files.append(os.path.join(root, file))
else:
missing_files.append(path)
for file in input_files:
with open(file, "r+") as file:
data = file.read()
errored = False
try:
self._compile(data)
except:
errored = True
formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
if data != formatted_str:
happened = "Would format"
if opts.fix and not errored:
file.seek(0)
file.truncate()
file.write(formatted_str)
happened = "Formatted"
diff_lines = []
a_lines = data.splitlines(keepends=True)
b_lines = formatted_str.splitlines(keepends=True)
for line in difflib.unified_diff(
a_lines, b_lines, fromfile=file.name, tofile=file.name, n=5
):
# Work around https://bugs.python.org/issue2142
# See:
# https://www.gnu.org/software/diffutils/manual/html_node/Incomplete-Lines.html
if line[-1] == "\n":
diff_lines.append(line)
else:
diff_lines.append(line + "\n")
diff_lines.append("\\ No newline at end of file\n")
print("".join(diff_lines))
to_print = Colors.BOLD
if errored:
to_print += f"{Colors.RED}Skipped {file.name}: Will not overwrite file with compile errors"
panic = True
skipped_files += 1
else:
to_print += f"{happened} {file.name}"
formatted_files += 1
print(to_print)
print(Colors.CLEAR)
missing_num = len(missing_files)
summary = ""
if missing_num > 0:
print(
f"{Colors.BOLD}{Colors.RED}Could not find files:{Colors.CLEAR}{Colors.BOLD}"
)
for path in missing_files:
print(f" {path}")
print(Colors.CLEAR)
panic = True
def would_be(verb):
return verb if opts.fix else f"would be {verb}"
def how_many(count, bold=True):
string = f"{Colors.BLUE}{count} {'files' if count != 1 else 'file'}{Colors.CLEAR}"
return Colors.BOLD + string + Colors.BOLD if bold else Colors.CLEAR + string
if formatted_files > 0:
summary += f"{how_many(formatted_files)} {would_be('formatted')}, "
panic = panic or not opts.fix
left_files = len(input_files) - formatted_files - skipped_files
summary += f"{how_many(left_files, False)} {would_be('left unchanged')}"
if skipped_files > 0:
summary += f", {how_many(skipped_files)} {would_be('skipped')}"
if missing_num > 0:
summary += f", {how_many(missing_num)} not found"
print(summary + Colors.CLEAR)
if panic:
sys.exit(1)
def cmd_lsp(self, opts):
langserv = LanguageServer()
langserv.run()