Add a formatter

This commit is contained in:
Gregor Niehl 2023-11-03 06:48:28 -05:00 committed by James Westman
parent 2faa9207de
commit 4fa64cdf33
12 changed files with 613 additions and 11 deletions

View file

@ -0,0 +1,225 @@
# formatter.py
#
# Copyright 2023 Gregor Niehl <gregorniehl@web.de>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import re
from enum import Enum
from . import tokenizer, utils
from .tokenizer import TokenType
OPENING_TOKENS = ("{", "[")
CLOSING_TOKENS = ("}", "]")
NEWLINE_AFTER = tuple(";") + OPENING_TOKENS + CLOSING_TOKENS
NO_WHITESPACE_BEFORE = (",", ":", "::", ";", ")", ".", ">", "]", "=")
NO_WHITESPACE_AFTER = ("C_", "_", "(", ".", "$", "<", "::", "[", "=")
# NO_WHITESPACE_BEFORE takes precedence over WHITESPACE_AFTER
WHITESPACE_AFTER = (":", ",", ">", ")", "|", "=>")
WHITESPACE_BEFORE = ("{", "|")
class LineType(Enum):
STATEMENT = 0
BLOCK_OPEN = 1
BLOCK_CLOSE = 2
CHILD_TYPE = 3
COMMENT = 4
class Formatter:
def format(data, tab_size=2, insert_space=True):
indent_levels = 0
tokens = tokenizer.tokenize(data)
end_str = ""
last_not_whitespace = tokens[0]
current_line = ""
prev_line_type = None
is_child_type = False
indent_item = " " * tab_size if insert_space else "\t"
watch_parentheses = False
parentheses_balance = 0
bracket_tracker = [None]
def commit_current_line(
line_type=prev_line_type, redo_whitespace=False, newlines_before=1
):
nonlocal end_str, current_line, prev_line_type
indent_whitespace = indent_levels * indent_item
whitespace_to_add = "\n" + indent_whitespace
if redo_whitespace or newlines_before != 1:
end_str = end_str.strip() + ("\n" * newlines_before)
if newlines_before > 0:
end_str += indent_whitespace
end_str += current_line + whitespace_to_add
current_line = ""
prev_line_type = line_type
for item in tokens:
if item.type != TokenType.WHITESPACE:
str_item = str(item)
whitespace_required = (
str_item in WHITESPACE_BEFORE
or str(last_not_whitespace) in WHITESPACE_AFTER
)
whitespace_blockers = (
str_item in NO_WHITESPACE_BEFORE
or str(last_not_whitespace) in NO_WHITESPACE_AFTER
)
this_or_last_is_ident = (
item.type == TokenType.IDENT
or last_not_whitespace.type == TokenType.IDENT
)
current_line_is_empty = len(current_line) == 0
just_one_word_on_line = re.match(r"^([A-Za-z_\-])+$", current_line)
is_short_object_def = str_item == "(" and not just_one_word_on_line
any_blockers = (
whitespace_blockers or current_line_is_empty or is_short_object_def
)
if (whitespace_required or this_or_last_is_ident) and not any_blockers:
current_line += " "
current_line += str_item
if str_item in ["[", "("]:
bracket_tracker.append(str_item)
elif str_item in ["]", ")"]:
bracket_tracker.pop()
needs_newline_treatment = (
str_item in NEWLINE_AFTER or item.type == TokenType.COMMENT
)
if needs_newline_treatment:
if str_item in OPENING_TOKENS:
list_or_child_type = str_item == "["
if list_or_child_type:
is_child_type = current_line.startswith("[")
if is_child_type:
if str(last_not_whitespace) not in OPENING_TOKENS:
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * indent_levels)
)
last_not_whitespace = item
continue
indent_levels += 1
keep_same_indent = not prev_line_type in [
LineType.CHILD_TYPE,
LineType.COMMENT,
LineType.BLOCK_OPEN,
]
if keep_same_indent:
end_str = (
end_str.strip()
+ "\n\n"
+ (indent_item * (indent_levels - 1))
)
commit_current_line(LineType.BLOCK_OPEN)
elif str_item == "]" and is_child_type:
commit_current_line(LineType.CHILD_TYPE, False)
is_child_type = False
elif str_item in CLOSING_TOKENS:
if str_item == "]" and last_not_whitespace != ",":
current_line = current_line[:-1]
commit_current_line()
current_line = "]"
indent_levels -= 1
commit_current_line(LineType.BLOCK_CLOSE, True)
elif str_item == ";":
line_type = LineType.STATEMENT
if len(current_line) == 1:
newlines = 0
line_type = LineType.BLOCK_CLOSE
elif prev_line_type == LineType.BLOCK_CLOSE:
newlines = 2
else:
newlines = 1
commit_current_line(line_type, newlines_before=newlines)
elif item.type == TokenType.COMMENT:
require_extra_newline = [
LineType.BLOCK_CLOSE,
LineType.STATEMENT,
LineType.COMMENT,
]
single_line_comment = str_item.startswith("//")
if (
single_line_comment
and prev_line_type == LineType.BLOCK_CLOSE
) or (
not single_line_comment
and prev_line_type in require_extra_newline
):
newlines = 2
else:
newlines = 1
commit_current_line(LineType.COMMENT, newlines_before=newlines)
else:
commit_current_line()
elif str_item == "(" and (
re.match("^([A-Za-z_\-])+\s*\(", current_line) or watch_parentheses
):
watch_parentheses = True
parentheses_balance += 1
elif str_item == ")" and watch_parentheses:
parentheses_balance -= 1
all_parentheses_closed = parentheses_balance == 0
if all_parentheses_closed:
commit_current_line(
newlines_before=2
if prev_line_type == LineType.BLOCK_CLOSE
else 1
)
watch_parentheses = False
tracker_is_empty = len(bracket_tracker) > 0
if tracker_is_empty:
last_in_tracker = bracket_tracker[-1]
is_list_comma = last_in_tracker == "[" and str_item == ","
if is_list_comma:
last_was_list_item = end_str.strip()[-1] not in ["[", ","]
if last_was_list_item:
end_str = end_str.strip()
commit_current_line()
last_not_whitespace = item
return end_str

View file

@ -22,11 +22,13 @@ import json
import sys
import traceback
import typing as T
from difflib import SequenceMatcher
from . import decompiler, parser, tokenizer, utils, xml_reader
from .ast_utils import AstNode
from .completions import complete
from .errors import CompileError, MultipleErrors
from .formatter import Formatter
from .lsp_utils import *
from .outputs.xml import XmlOutput
from .tokenizer import Token
@ -211,6 +213,7 @@ class LanguageServer:
"hoverProvider": True,
"documentSymbolProvider": True,
"definitionProvider": True,
"documentFormattingProvider": True,
},
"serverInfo": {
"name": "Blueprint",
@ -280,6 +283,38 @@ class LanguageServer:
id, [completion.to_json(True) for completion in completions]
)
@command("textDocument/formatting")
def formatting(self, id, params):
open_file = self._open_files[params["textDocument"]["uri"]]
if open_file.text is None:
self._send_error(id, ErrorCode.RequestFailed, "Document is not open")
return
try:
formatted_blp = Format.format(
open_file.text,
params["options"]["tabSize"],
params["options"]["insertSpaces"],
)
except PrintableError:
self._send_error(id, ErrorCode.RequestFailed, "Could not format document")
return
lst = []
for tag, i1, i2, j1, j2 in SequenceMatcher(
None, open_file.text, formatted_blp
).get_opcodes():
if tag in ("replace", "insert", "delete"):
lst.append(
TextEdit(
Range(i1, i2, open_file.text),
"" if tag == "delete" else formatted_blp[j1:j2],
).to_json()
)
self._send_response(id, lst)
@command("textDocument/x-blueprint-compile")
def compile(self, id, params):
open_file = self._open_files[params["textDocument"]["uri"]]

View file

@ -186,3 +186,12 @@ class LocationLink:
"targetRange": self.target_range.to_json(),
"targetSelectionRange": self.target_selection_range.to_json(),
}
@dataclass
class TextEdit:
range: Range
newText: str
def to_json(self):
return {"range": self.range.to_json(), "newText": self.newText}

View file

@ -19,13 +19,14 @@
import argparse
import json
import difflib
import os
import sys
import typing as T
from . import interactive_port, parser, tokenizer
from .errors import CompileError, CompilerBugError, PrintableError, report_bug
from .formatter import Formatter
from .gir import add_typelib_search_path
from .lsp import LanguageServer
from .outputs import XmlOutput
@ -66,6 +67,36 @@ class BlueprintApp:
type=argparse.FileType("r"),
)
format = self.add_subcommand(
"format", "Format given blueprint files", self.cmd_format
)
format.add_argument(
"-f",
"--fix",
help="Apply the edits to the files",
default=False,
action="store_true",
)
format.add_argument(
"-t",
"--tabs",
help="Use tabs instead of spaces",
default=False,
action="store_true",
)
format.add_argument(
"-s",
"--spaces-num",
help="How many spaces should be used per indent",
default=2,
type=int,
)
format.add_argument(
"inputs",
nargs="+",
metavar="filenames",
)
port = self.add_subcommand("port", "Interactive porting tool", self.cmd_port)
lsp = self.add_subcommand(
@ -152,6 +183,111 @@ class BlueprintApp:
e.pretty_print(file.name, data)
sys.exit(1)
def cmd_format(self, opts):
input_files = []
missing_files = []
panic = False
formatted_files = 0
skipped_files = 0
for path in opts.inputs:
if os.path.isfile(path):
input_files.append(path)
elif os.path.isdir(path):
for root, subfolders, files in os.walk(path):
for file in files:
if file.endswith(".blp"):
input_files.append(os.path.join(root, file))
else:
missing_files.append(path)
for file in input_files:
with open(file, "r+") as file:
data = file.read()
errored = False
try:
self._compile(data)
except:
errored = True
formatted_str = Formatter.format(data, opts.spaces_num, not opts.tabs)
if data != formatted_str:
happened = "Would format"
if opts.fix and not errored:
file.seek(0)
file.truncate()
file.write(formatted_str)
happened = "Formatted"
diff_lines = []
a_lines = data.splitlines(keepends=True)
b_lines = formatted_str.splitlines(keepends=True)
for line in difflib.unified_diff(
a_lines, b_lines, fromfile=file.name, tofile=file.name, n=5
):
# Work around https://bugs.python.org/issue2142
# See:
# https://www.gnu.org/software/diffutils/manual/html_node/Incomplete-Lines.html
if line[-1] == "\n":
diff_lines.append(line)
else:
diff_lines.append(line + "\n")
diff_lines.append("\\ No newline at end of file\n")
print("".join(diff_lines))
to_print = Colors.BOLD
if errored:
to_print += f"{Colors.RED}Skipped {file.name}: Will not overwrite file with compile errors"
panic = True
skipped_files += 1
else:
to_print += f"{happened} {file.name}"
formatted_files += 1
print(to_print)
print(Colors.CLEAR)
missing_num = len(missing_files)
summary = ""
if missing_num > 0:
print(
f"{Colors.BOLD}{Colors.RED}Could not find files:{Colors.CLEAR}{Colors.BOLD}"
)
for path in missing_files:
print(f" {path}")
print(Colors.CLEAR)
panic = True
def would_be(verb):
return verb if opts.fix else f"would be {verb}"
def how_many(count, bold=True):
string = f"{Colors.BLUE}{count} {'files' if count != 1 else 'file'}{Colors.CLEAR}"
return Colors.BOLD + string + Colors.BOLD if bold else Colors.CLEAR + string
if formatted_files > 0:
summary += f"{how_many(formatted_files)} {would_be('formatted')}, "
panic = panic or not opts.fix
left_files = len(input_files) - formatted_files - skipped_files
summary += f"{how_many(left_files, False)} {would_be('left unchanged')}"
if skipped_files > 0:
summary += f", {how_many(skipped_files)} {would_be('skipped')}"
if missing_num > 0:
summary += f", {how_many(missing_num)} not found"
print(summary + Colors.CLEAR)
if panic:
sys.exit(1)
def cmd_lsp(self, opts):
langserv = LanguageServer()
langserv.run()

View file

@ -17,9 +17,9 @@
# -- Project information -----------------------------------------------------
project = 'Blueprint'
copyright = '2021-2023, James Westman'
author = 'James Westman'
project = "Blueprint"
copyright = "2021-2023, James Westman"
author = "James Westman"
# -- General configuration ---------------------------------------------------
@ -27,16 +27,15 @@ author = 'James Westman'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
extensions = []
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# -- Options for HTML output -------------------------------------------------
@ -44,11 +43,11 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'furo'
html_theme = "furo"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
html_css_files = ['styles.css']
html_css_files = ["styles.css"]

View file

@ -0,0 +1,71 @@
using Gtk 4.0;
using Adw 1;
template $MyTemplate: Label {
/**
* A list of strings.
*/
StringList {
// comment
strings [
"Hello",
C_("Greeting", "World"),
]
}
object: Button {
label: "Click me";
};
flags: a | b;
[child]
Label {
}
[child]
Label label2 {
}
// Single line comment.
/**
* Multiline comment.
*/
// Single line comment.
value: bind 1.0 as <double>;
as: 1;
signal => $on_signal() after;
}
Dialog {
[action response=ok]
$MyButton {
}
}
menu menu {
item ("test")
item {
label: "test";
}
item ("test")
}
Adw.MessageDialog {
responses [
save: "Save" suggested disabled,
]
}
Adw.Breakpoint {
condition ("width < 100")
setters {
label2.label: _("Hello, world!");
label2.visible: false;
label2.extra-menu: null;
}
}

1
tests/formatting/in1.blp Normal file
View file

@ -0,0 +1 @@
using Gtk 4.0;using Adw 1;Overlay{Label label{label:_("'Hello World!' \"\n\t\"");}[overlay]Button{notify::icon-name=>$on_icon_name_changed(label)swapped;styles["destructive"]}visible:bind $isVisible(label.visible,my-menu)as<bool>;width-request:bind label.width-request no-sync-create;}menu my-menu{item(_("Label"), "action-name", "icon-name")item{action:"win.format";}}

40
tests/formatting/in2.blp Normal file
View file

@ -0,0 +1,40 @@
using Gtk 4.0;
using Adw 1;
Overlay {
Label
label
{
label
:
_
(
"'Hello World!' \"\n\t\""
)
;
}
[
overlay
] Button
{ notify
:: icon-name
=> $ on_icon_name_changed ( label )
swapped ;
styles
[ "destructive" ]
}
visible
: bind $ isVisible ( label.visible ,
my-menu ) as
< bool > ; width-request : bind label . width-request no-sync-create ; }
menu my-menu
{ item ( _ ( "Label" ) , "action-name" , "icon-name" ) item { action : "win.format" ; } }

28
tests/formatting/out.blp Normal file
View file

@ -0,0 +1,28 @@
using Gtk 4.0;
using Adw 1;
Overlay {
Label label {
label: _("'Hello World!' \"\n\t\"");
}
[overlay]
Button {
notify::icon-name => $on_icon_name_changed(label) swapped;
styles [
"destructive"
]
}
visible: bind $isVisible(label.visible, my-menu) as <bool>;
width-request: bind label.width-request no-sync-create;
}
menu my-menu {
item (_("Label"), "action-name", "icon-name")
item {
action: "win.format";
}
}

View file

@ -0,0 +1,5 @@
using Gtk 4.0;
Label {
label: "\"'\'\t\n\\'";
}

View file

@ -0,0 +1,5 @@
using Gtk 4.0;
Label {
label: "\"'\'\t\n\\'";
}

48
tests/test_formatter.py Normal file
View file

@ -0,0 +1,48 @@
# test_formatter.py
#
# Copyright 2023 James Westman <james@jwestman.net>
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This file is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import unittest
from pathlib import Path
from blueprintcompiler.formatter import Formatter
class TestFormatter(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.maxDiff = None
def assert_format_test(self, input_file, expected_file):
print("assert_format_test({}, {})".format(input_file, expected_file))
with open((Path(__file__).parent / f"formatting/{input_file}").resolve()) as f:
input_data = f.read()
with open(
(Path(__file__).parent / f"formatting/{expected_file}").resolve()
) as f:
expected = f.read()
actual = Formatter.format(input_data)
self.assertEqual(actual, expected)
def test_formatter(self):
self.assert_format_test("in1.blp", "out.blp")
self.assert_format_test("in2.blp", "out.blp")
self.assert_format_test("correct1.blp", "correct1.blp")
self.assert_format_test("string_in.blp", "string_out.blp")