From 06f54c8ff8d5f7c78c1d21bcbc59fdd87ce14910 Mon Sep 17 00:00:00 2001 From: James Westman Date: Sat, 5 Mar 2022 17:54:27 -0600 Subject: [PATCH] Use typelib instead of XML For normal compilation, use .typelib files rather than .gir XML files. This is much faster. Rather than using libgirepository, which would try to actually load the libraries, we use a custom parser. The language server will still read XML because it needs to access documentation, which is not in the typelib, but that's generally fine because it's a long lived process and only has to do that once. --- .gitlab-ci.yml | 2 +- blueprint-compiler.py | 3 +- blueprintcompiler/decompiler.py | 25 +- blueprintcompiler/gir.py | 436 ++++++++++++++++++-------- blueprintcompiler/language/imports.py | 10 +- blueprintcompiler/language/values.py | 4 +- blueprintcompiler/main.py | 7 +- blueprintcompiler/typelib.py | 292 +++++++++++++++++ blueprintcompiler/xml_reader.py | 32 +- tests/fuzz.py | 6 +- 10 files changed, 647 insertions(+), 170 deletions(-) create mode 100644 blueprintcompiler/typelib.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0820f5a..bd40630 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,7 +11,7 @@ build: - coverage report - coverage html - coverage xml - - meson _build -Ddocs=true + - meson _build -Ddocs=true --prefix=/usr - ninja -C _build - ninja -C _build test - ninja -C _build install diff --git a/blueprint-compiler.py b/blueprint-compiler.py index aa1dc8f..f6a542e 100755 --- a/blueprint-compiler.py +++ b/blueprint-compiler.py @@ -29,6 +29,7 @@ libdir = "@LIBDIR@" if version == "\u0040VERSION@": version = "uninstalled" + libdir = None else: # If Meson set the configuration values, insert the module path it set sys.path.insert(0, module_path) @@ -36,4 +37,4 @@ else: from blueprintcompiler import main if __name__ == "__main__": - main.main(version) + main.main(version, libdir) diff --git a/blueprintcompiler/decompiler.py b/blueprintcompiler/decompiler.py index a7363e5..cd66386 100644 --- a/blueprintcompiler/decompiler.py +++ b/blueprintcompiler/decompiler.py @@ -122,8 +122,13 @@ class DecompileCtx: self._blocks_need_end[-1] = _CLOSING[line[-1]] self._indent += 1 - def print_attribute(self, name, value, type): + def get_enum_name(value): + for member in type.members.values(): + if member.nick == value or member.c_ident == value: + return member.name + return value.replace('-', '_') + if type is None: self.print(f"{name}: \"{escape_quote(value)}\";") elif type.assignable_to(FloatType()): @@ -141,16 +146,11 @@ class DecompileCtx: self.print(f"{name}: \"{escape_quote(value)}\";") elif type.assignable_to(self.gir.namespaces["Gtk"].lookup_type("GObject.Object")): self.print(f"{name}: {value};") - elif isinstance(type, Enumeration): - for member in type.members.values(): - if member.nick == value or member.c_ident == value: - self.print(f"{name}: {member.name};") - break - else: - self.print(f"{name}: {value.replace('-', '_')};") elif isinstance(type, Bitfield): - flags = re.sub(r"\s*\|\s*", " | ", value).replace("-", "_") - self.print(f"{name}: {flags};") + flags = [get_enum_name(flag) for flag in value.split("|")] + self.print(f"{name}: {' | '.join(flags)};") + elif isinstance(type, Enumeration): + self.print(f"{name}: {get_enum_name(value)};") else: self.print(f"{name}: \"{escape_quote(value)}\";") @@ -171,9 +171,8 @@ def _decompile_element(ctx: DecompileCtx, gir, xml): ctx.start_block() gir = decompiler(ctx, gir, **args) - for child_type in xml.children.values(): - for child in child_type: - _decompile_element(ctx, gir, child) + for child in xml.children: + _decompile_element(ctx, gir, child) ctx.end_block() diff --git a/blueprintcompiler/gir.py b/blueprintcompiler/gir.py index 6234077..1085222 100644 --- a/blueprintcompiler/gir.py +++ b/blueprintcompiler/gir.py @@ -22,39 +22,90 @@ import typing as T import os, sys from .errors import CompileError, CompilerBugError -from . import xml_reader +from . import typelib, xml_reader - -extra_search_paths: T.List[str] = [] _namespace_cache = {} - -_search_paths = [] -xdg_data_home = os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share")) -_search_paths.append(os.path.join(xdg_data_home, "gir-1.0")) -xdg_data_dirs = os.environ.get("XDG_DATA_DIRS", "/usr/share:/usr/local/share").split(":") -_search_paths += [os.path.join(dir, "gir-1.0") for dir in xdg_data_dirs] +_xml_cache = {} def get_namespace(namespace, version): - filename = f"{namespace}-{version}.gir" + from .main import LIBDIR, VERSION + search_paths = [] + if LIBDIR is not None: + search_paths.append(os.path.join(LIBDIR, "girepository-1.0")) + + # This is a fragile hack to make blueprint-compiler work uninstalled on + # most systems. + if VERSION == "uninstalled": + search_paths += [ + "/usr/lib/girepository-1.0", + "/usr/local/lib/girepository-1.0", + "/app/lib/girepository-1.0", + "/usr/lib64/girepository-1.0", + "/usr/local/lib64/girepository-1.0", + "/app/lib64/girepository-1.0", + ] + + if typelib_path := os.environ.get("GI_TYPELIB_PATH"): + search_paths.append(typelib_path) + + filename = f"{namespace}-{version}.typelib" if filename not in _namespace_cache: - for search_path in _search_paths: + for search_path in search_paths: path = os.path.join(search_path, filename) if os.path.exists(path) and os.path.isfile(path): - xml = xml_reader.parse(path, xml_reader.PARSE_GIR) - repository = Repository(xml) + tl = typelib.load_typelib(path) + repository = Repository(tl) - _namespace_cache[filename] = repository.namespaces.get(namespace) + _namespace_cache[filename] = repository.namespace break if filename not in _namespace_cache: - raise CompileError(f"Namespace {namespace}-{version} could not be found") + raise CompileError( + f"Namespace {namespace}-{version} could not be found", + hints=["search path: " + os.pathsep.join(search_paths)], + ) return _namespace_cache[filename] +def get_xml(namespace, version): + from .main import VERSION + from xml.etree import ElementTree + search_paths = [] + + # Same fragile hack as before + if VERSION == "uninstalled": + search_paths += [ + "/usr/share/gir-1.0", + "/usr/local/share/gir-1.0", + "/app/share/gir-1.0", + ] + + if data_paths := os.environ.get("XDG_DATA_DIRS"): + search_paths += [os.path.join(path, "gir-1.0") for path in data_paths.split(os.pathsep)] + + filename = f"{namespace}-{version}.gir" + + if filename not in _xml_cache: + for search_path in search_paths: + path = os.path.join(search_path, filename) + + if os.path.exists(path) and os.path.isfile(path): + _xml_cache[filename] = xml_reader.parse(path) + break + + if filename not in _xml_cache: + raise CompileError( + f"GObject introspection file '{namespace}-{version}.gir' could not be found", + hints=["search path: " + os.pathsep.join(search_paths)], + ) + + return _xml_cache[filename] + + class GirType: @property def doc(self): @@ -115,9 +166,9 @@ _BASIC_TYPES = { } class GirNode: - def __init__(self, container, xml): + def __init__(self, container, tl): self.container = container - self.xml = xml + self.tl = tl def get_containing(self, container_type): if self.container is None: @@ -127,9 +178,15 @@ class GirNode: else: return self.container.get_containing(container_type) + @cached_property + def xml(self): + for el in self.container.xml.children: + if el.attrs.get("name") == self.name: + return el + @cached_property def glib_type_name(self): - return self.xml["glib:type-name"] + return self.tl.OBJ_GTYPE_NAME @cached_property def full_name(self): @@ -140,11 +197,11 @@ class GirNode: @cached_property def name(self) -> str: - return self.xml["name"] + return self.tl.BLOB_NAME @cached_property def cname(self) -> str: - return self.xml["c:type"] + return self.tl.OBJ_GTYPE_NAME @cached_property def available_in(self) -> str: @@ -169,7 +226,7 @@ class GirNode: @property def type_name(self): - return self.xml.get_elements('type')[0]['name'] + return self.type.name @property def type(self): @@ -177,76 +234,164 @@ class GirNode: class Property(GirNode): - def __init__(self, klass, xml: xml_reader.Element): - super().__init__(klass, xml) + def __init__(self, klass, tl: typelib.Typelib): + super().__init__(klass, tl) - @property + @cached_property + def name(self): + return self.tl.PROP_NAME + + @cached_property + def type(self): + return self.get_containing(Repository)._resolve_type_id(self.tl.PROP_TYPE) + + @cached_property def signature(self): return f"{self.type_name} {self.container.name}.{self.name}" @property def writable(self): - return self.xml["writable"] == "1" + return self.tl.PROP_WRITABLE == 1 @property def construct_only(self): - return self.xml["construct-only"] == "1" + return self.tl.PROP_CONSTRUCT_ONLY == 1 class Parameter(GirNode): - def __init__(self, container: GirNode, xml: xml_reader.Element): - super().__init__(container, xml) + def __init__(self, container: GirNode, tl: typelib.Typelib): + super().__init__(container, tl) class Signal(GirNode): - def __init__(self, klass, xml: xml_reader.Element): - super().__init__(klass, xml) - if parameters := xml.get_elements('parameters'): - self.params = [Parameter(self, child) for child in parameters[0].get_elements('parameter')] - else: - self.params = [] + def __init__(self, klass, tl: typelib.Typelib): + super().__init__(klass, tl) + # if parameters := xml.get_elements('parameters'): + # self.params = [Parameter(self, child) for child in parameters[0].get_elements('parameter')] + # else: + # self.params = [] @property def signature(self): - args = ", ".join([f"{p.type_name} {p.name}" for p in self.params]) + # TODO: fix + # args = ", ".join([f"{p.type_name} {p.name}" for p in self.params]) + args = "" return f"signal {self.container.name}.{self.name} ({args})" class Interface(GirNode, GirType): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self.properties = {child["name"]: Property(self, child) for child in xml.get_elements("property")} - self.signals = {child["name"]: Signal(self, child) for child in xml.get_elements("glib:signal")} - self.prerequisites = [child["name"] for child in xml.get_elements("prerequisite")] + def __init__(self, ns, tl: typelib.Typelib): + super().__init__(ns, tl) + + @cached_property + def properties(self): + n_prerequisites = self.tl.INTERFACE_N_PREREQUISITES + offset = self.tl.header.HEADER_INTERFACE_BLOB_SIZE + offset += (n_prerequisites + n_prerequisites % 2) * 2 + n_properties = self.tl.INTERFACE_N_PROPERTIES + property_size = self.tl.header.HEADER_PROPERTY_BLOB_SIZE + result = {} + for i in range(n_properties): + property = Property(self, self.tl[offset + i * property_size]) + result[property.name] = property + return result + + @cached_property + def signals(self): + n_prerequisites = self.tl.INTERFACE_N_PREREQUISITES + offset = self.tl.header.HEADER_INTERFACE_BLOB_SIZE + offset += (n_prerequisites + n_prerequisites % 2) * 2 + offset += self.tl.INTERFACE_N_PROPERTIES * self.tl.header.HEADER_PROPERTY_BLOB_SIZE + offset += self.tl.INTERFACE_N_METHODS * self.tl.header.HEADER_FUNCTION_BLOB_SIZE + n_signals = self.tl.INTERFACE_N_SIGNALS + property_size = self.tl.header.HEADER_SIGNAL_BLOB_SIZE + result = {} + for i in range(n_signals): + signal = Signal(self, self.tl[offset + i * property_size]) + result[signal.name] = signal + return result + + @cached_property + def prerequisites(self): + n_prerequisites = self.tl.INTERFACE_N_PREREQUISITES + result = [] + for i in range(n_prerequisites): + entry = self.tl.INTERFACE_PREREQUISITES[i * 2].AS_DIR_ENTRY + result.append(self.get_containing(Repository)._resolve_dir_entry(entry)) + return result def assignable_to(self, other) -> bool: if self == other: return True for pre in self.prerequisites: - if self.get_containing(Namespace).lookup_type(pre).assignable_to(other): + if pre.assignable_to(other): return True return False class Class(GirNode, GirType): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self._parent = xml["parent"] - self.implements = [impl["name"] for impl in xml.get_elements("implements")] - self.own_properties = {child["name"]: Property(self, child) for child in xml.get_elements("property")} - self.own_signals = {child["name"]: Signal(self, child) for child in xml.get_elements("glib:signal")} + def __init__(self, ns, tl: typelib.Typelib): + super().__init__(ns, tl) @property def abstract(self): - return self.xml["abstract"] == "1" + return self.tl.OBJ_ABSTRACT == 1 - @property + @cached_property + def implements(self): + n_interfaces = self.tl.OBJ_N_INTERFACES + result = [] + for i in range(n_interfaces): + entry = self.tl[self.tl.header.HEADER_OBJECT_BLOB_SIZE + i * 2].AS_DIR_ENTRY + result.append(self.get_containing(Repository)._resolve_dir_entry(entry)) + return result + + @cached_property + def own_properties(self): + n_interfaces = self.tl.OBJ_N_INTERFACES + offset = self.tl.header.HEADER_OBJECT_BLOB_SIZE + offset += (n_interfaces + n_interfaces % 2) * 2 + offset += self.tl.OBJ_N_FIELDS * self.tl.header.HEADER_FIELD_BLOB_SIZE + offset += self.tl.OBJ_N_FIELD_CALLBACKS * self.tl.header.HEADER_CALLBACK_BLOB_SIZE + n_properties = self.tl.OBJ_N_PROPERTIES + property_size = self.tl.header.HEADER_PROPERTY_BLOB_SIZE + result = {} + for i in range(n_properties): + property = Property(self, self.tl[offset + i * property_size]) + result[property.name] = property + return result + + @cached_property + def own_signals(self): + n_interfaces = self.tl.OBJ_N_INTERFACES + offset = self.tl.header.HEADER_OBJECT_BLOB_SIZE + offset += (n_interfaces + n_interfaces % 2) * 2 + offset += self.tl.OBJ_N_FIELDS * self.tl.header.HEADER_FIELD_BLOB_SIZE + offset += self.tl.OBJ_N_FIELD_CALLBACKS * self.tl.header.HEADER_CALLBACK_BLOB_SIZE + offset += self.tl.OBJ_N_PROPERTIES * self.tl.header.HEADER_PROPERTY_BLOB_SIZE + offset += self.tl.OBJ_N_METHODS * self.tl.header.HEADER_FUNCTION_BLOB_SIZE + n_signals = self.tl.OBJ_N_SIGNALS + signal_size = self.tl.header.HEADER_SIGNAL_BLOB_SIZE + result = {} + for i in range(n_signals): + signal = Signal(self, self.tl[offset][i * signal_size]) + result[signal.name] = signal + return result + + @cached_property + def parent(self): + if entry := self.tl.OBJ_PARENT: + return self.get_containing(Repository)._resolve_dir_entry(entry) + else: + return None + + @cached_property def signature(self): result = f"class {self.container.name}.{self.name}" if self.parent is not None: result += f" : {self.parent.container.name}.{self.parent.name}" if len(self.implements): - result += " implements " + ", ".join(self.implements) + result += " implements " + ", ".join([impl.full_name for impl in self.implements]) return result @cached_property @@ -257,13 +402,6 @@ class Class(GirNode, GirType): def signals(self): return { s.name: s for s in self._enum_signals() } - @cached_property - def parent(self): - if self._parent is None: - return None - return self.get_containing(Namespace).lookup_type(self._parent) - - def assignable_to(self, other) -> bool: if self == other: return True @@ -271,12 +409,11 @@ class Class(GirNode, GirType): return True else: for iface in self.implements: - if self.get_containing(Namespace).lookup_type(iface).assignable_to(other): + if iface.assignable_to(other): return True return False - def _enum_properties(self): yield from self.own_properties.values() @@ -284,7 +421,7 @@ class Class(GirNode, GirType): yield from self.parent.properties.values() for impl in self.implements: - yield from self.get_containing(Namespace).lookup_type(impl).properties.values() + yield from impl.properties.values() def _enum_signals(self): yield from self.own_signals.values() @@ -293,25 +430,28 @@ class Class(GirNode, GirType): yield from self.parent.signals.values() for impl in self.implements: - yield from self.get_containing(Namespace).lookup_type(impl).signals.values() + yield from impl.signals.values() class EnumMember(GirNode): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self._value = xml["value"] + def __init__(self, ns, tl: typelib.Typelib): + super().__init__(ns, tl) @property def value(self): - return self._value + return self.tl.VALUE_VALUE - @property + @cached_property + def name(self): + return self.tl.VALUE_NAME + + @cached_property def nick(self): - return self.xml["glib:nick"] + return self.name.replace("_", "-") @property def c_ident(self): - return self.xml["c:identifier"] + return self.tl.attr("c:identifier") @property def signature(self): @@ -319,9 +459,19 @@ class EnumMember(GirNode): class Enumeration(GirNode, GirType): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self.members = { child["name"]: EnumMember(self, child) for child in xml.get_elements("member") } + def __init__(self, ns, tl: typelib.Typelib): + super().__init__(ns, tl) + + @cached_property + def members(self): + members = {} + n_values = self.tl.ENUM_N_VALUES + values = self.tl.ENUM_VALUES + value_size = self.tl.header.HEADER_VALUE_BLOB_SIZE + for i in range(n_values): + member = EnumMember(self, values[i * value_size]) + members[member.name] = member + return members @property def signature(self): @@ -331,64 +481,68 @@ class Enumeration(GirNode, GirType): return type == self -class BitfieldMember(GirNode): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self._value = xml["value"] - - @property - def value(self): - return self._value - - @property - def signature(self): - return f"bitfield member {self.full_name} = {bin(self.value)}" - - -class Bitfield(GirNode, GirType): - def __init__(self, ns, xml: xml_reader.Element): - super().__init__(ns, xml) - self.members = { child["name"]: EnumMember(self, child) for child in xml.get_elements("member") } - - @property - def signature(self): - return f"bitfield {self.full_name}" - - def assignable_to(self, type): - return type == self +class Bitfield(Enumeration): + def __init__(self, ns, tl: typelib.Typelib): + super().__init__(ns, tl) class Namespace(GirNode): - def __init__(self, repo, xml: xml_reader.Element): - super().__init__(repo, xml) - self.classes = { child["name"]: Class(self, child) for child in xml.get_elements("class") } - self.interfaces = { child["name"]: Interface(self, child) for child in xml.get_elements("interface") } - self.enumerations = { child["name"]: Enumeration(self, child) for child in xml.get_elements("enumeration") } - self.bitfields = { child["name"]: Bitfield(self, child) for child in xml.get_elements("bitfield") } - self.version = xml["version"] + def __init__(self, repo, tl: typelib.Typelib): + super().__init__(repo, tl) + + self.entries: T.Dict[str, GirNode] = {} + + n_local_entries = tl.HEADER_N_ENTRIES + directory = tl.HEADER_DIRECTORY + for i in range(n_local_entries): + entry = directory[i * tl.HEADER_ENTRY_BLOB_SIZE] + entry_name = entry.DIR_ENTRY_NAME + entry_type = entry.DIR_ENTRY_BLOB_TYPE + entry_blob = entry.DIR_ENTRY_OFFSET + + if entry_type == typelib.BLOB_TYPE_ENUM: + self.entries[entry_name] = Enumeration(self, entry_blob) + elif entry_type == typelib.BLOB_TYPE_FLAGS: + self.entries[entry_name] = Bitfield(self, entry_blob) + elif entry_type == typelib.BLOB_TYPE_OBJECT: + self.entries[entry_name] = Class(self, entry_blob) + elif entry_type == typelib.BLOB_TYPE_INTERFACE: + self.entries[entry_name] = Interface(self, entry_blob) + + @cached_property + def xml(self): + return get_xml(self.name, self.version).get_elements("namespace")[0] + + @cached_property + def name(self): + return self.tl.HEADER_NAMESPACE + + @cached_property + def version(self): + return self.tl.HEADER_NSVERSION @property def signature(self): return f"namespace {self.name} {self.version}" + @cached_property + def classes(self): + return { name: entry for name, entry in self.entries.items() if isinstance(entry, Class) } + + @cached_property + def interfaces(self): + return { name: entry for name, entry in self.entries.items() if isinstance(entry, Interface) } def get_type(self, name): """ Gets a type (class, interface, enum, etc.) from this namespace. """ - return ( - self.classes.get(name) - or self.interfaces.get(name) - or self.enumerations.get(name) - or self.bitfields.get(name) - ) - + return self.entries.get(name) def get_type_by_cname(self, cname: str): """ Gets a type from this namespace by its C name. """ - for item in [*self.classes.values(), *self.interfaces.values(), *self.enumerations.values()]: - if item.cname == cname: + for item in self.entries.values(): + if hasattr(item, "cname") and item.cname == cname: return item - def lookup_type(self, type_name: str): """ Looks up a type in the scope of this namespace (including in the namespace's dependencies). """ @@ -403,25 +557,26 @@ class Namespace(GirNode): class Repository(GirNode): - def __init__(self, xml: xml_reader.Element): - super().__init__(None, xml) - self.namespaces = { child["name"]: Namespace(self, child) for child in xml.get_elements("namespace") } + def __init__(self, tl: typelib.Typelib): + super().__init__(None, tl) - try: - self.includes = { include["name"]: get_namespace(include["name"], include["version"]) for include in xml.get_elements("include") } - except: - raise CompilerBugError(f"Failed to load dependencies.") + self.namespace = Namespace(self, tl) + if dependencies := tl[0x24].string: + deps = [tuple(dep.split("-", 1)) for dep in dependencies.split("|")] + try: + self.includes = { name: get_namespace(name, version) for name, version in deps } + except: + raise CompilerBugError(f"Failed to load dependencies.") + else: + self.includes = {} def get_type(self, name: str, ns: str) -> T.Optional[GirNode]: - if namespace := self.namespaces.get(ns): - return namespace.get_type(name) - else: - return self.lookup_namespace(ns).get_type(name) + return self.lookup_namespace(ns).get_type(name) def get_type_by_cname(self, name: str) -> T.Optional[GirNode]: - for ns in self.namespaces.values(): + for ns in [self.namespace, *self.includes.values()]: if type := ns.get_type_by_cname(name): return type return None @@ -429,13 +584,40 @@ class Repository(GirNode): def lookup_namespace(self, ns: str): """ Finds a namespace among this namespace's dependencies. """ - if namespace := self.namespaces.get(ns): - return namespace + if ns == self.namespace.name: + return self.namespace else: for include in self.includes.values(): if namespace := include.get_containing(Repository).lookup_namespace(ns): return namespace + def _resolve_dir_entry(self, dir_entry: typelib.Typelib): + if dir_entry.DIR_ENTRY_LOCAL: + return self.namespace.get_type(dir_entry.DIR_ENTRY_NAME) + else: + ns = dir_entry.DIR_ENTRY_NAMESPACE + return self.lookup_namespace(ns).get_type(dir_entry.DIR_ENTRY_NAME) + + def _resolve_type_id(self, type_id: int): + if type_id & 0xFFFFFF == 0: + type_id = (type_id >> 27) & 0x1F + # simple type + if type_id == typelib.TYPE_BOOLEAN: + return BoolType() + elif type_id in [typelib.TYPE_FLOAT, typelib.TYPE_DOUBLE]: + return FloatType() + elif type_id in [typelib.TYPE_INT8, typelib.TYPE_INT16, typelib.TYPE_INT32, typelib.TYPE_INT64]: + return IntType() + elif type_id in [typelib.TYPE_UINT8, typelib.TYPE_UINT16, typelib.TYPE_UINT32, typelib.TYPE_UINT64]: + return UIntType() + elif type_id == typelib.TYPE_UTF8: + return StringType() + else: + raise CompilerBugError("Unknown type ID", type_id) + else: + return self._resolve_dir_entry(self.tl.header[type_id].INTERFACE_TYPE_INTERFACE) + + class GirContext: def __init__(self): diff --git a/blueprintcompiler/language/imports.py b/blueprintcompiler/language/imports.py index cdef888..682f8cd 100644 --- a/blueprintcompiler/language/imports.py +++ b/blueprintcompiler/language/imports.py @@ -41,8 +41,14 @@ class GtkDirective(AstNode): try: gir.get_namespace("Gtk", self.tokens["version"]) - except: - raise CompileError("Could not find GTK 4 introspection files. Is gobject-introspection installed?", fatal=True) + except CompileError as e: + raise CompileError( + "Could not find GTK 4 introspection files. Is gobject-introspection installed?", + fatal=True, + # preserve the hints from the original error, because it contains + # useful debugging information + hints=e.hints, + ) @property diff --git a/blueprintcompiler/language/values.py b/blueprintcompiler/language/values.py index 29e5d98..f39db53 100644 --- a/blueprintcompiler/language/values.py +++ b/blueprintcompiler/language/values.py @@ -153,7 +153,7 @@ class IdentValue(Value): def validate_for_type(self): type = self.parent.value_type - if isinstance(type, gir.Enumeration) or isinstance(type, gir.Bitfield): + if isinstance(type, gir.Enumeration): if self.tokens["value"] not in type.members: raise CompileError( f"{self.tokens['value']} is not a member of {type.full_name}", @@ -183,7 +183,7 @@ class IdentValue(Value): @docs() def docs(self): type = self.parent.value_type - if isinstance(type, gir.Enumeration) or isinstance(type, gir.Bitfield): + if isinstance(type, gir.Enumeration): if member := type.members.get(self.tokens["value"]): return member.doc else: diff --git a/blueprintcompiler/main.py b/blueprintcompiler/main.py index 1a5e92a..d528e7e 100644 --- a/blueprintcompiler/main.py +++ b/blueprintcompiler/main.py @@ -28,6 +28,7 @@ from .utils import Colors from .xml_emitter import XmlEmitter VERSION = "uninstalled" +LIBDIR = None class BlueprintApp: def main(self): @@ -144,7 +145,7 @@ class BlueprintApp: return ast.generate(), warnings -def main(version): - global VERSION - VERSION = version +def main(version, libdir): + global VERSION, LIBDIR + VERSION, LIBDIR = version, libdir BlueprintApp().main() diff --git a/blueprintcompiler/typelib.py b/blueprintcompiler/typelib.py new file mode 100644 index 0000000..be1f366 --- /dev/null +++ b/blueprintcompiler/typelib.py @@ -0,0 +1,292 @@ +# typelib.py +# +# Copyright 2022 James Westman +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program. If not, see . +# +# SPDX-License-Identifier: LGPL-3.0-or-later + + +import typing as T +import math +from ctypes import * +import mmap, os + +from .errors import CompilerBugError + + +BLOB_TYPE_ENUM = 5 +BLOB_TYPE_FLAGS = 6 +BLOB_TYPE_OBJECT = 7 +BLOB_TYPE_INTERFACE = 8 + +TYPE_VOID = 0 +TYPE_BOOLEAN = 1 +TYPE_INT8 = 2 +TYPE_UINT8 = 3 +TYPE_INT16 = 4 +TYPE_UINT16 = 5 +TYPE_INT32 = 6 +TYPE_UINT32 = 7 +TYPE_INT64 = 8 +TYPE_UINT64 = 9 +TYPE_FLOAT = 10 +TYPE_DOUBLE = 11 +TYPE_GTYPE = 12 +TYPE_UTF8 = 13 +TYPE_FILENAME = 14 +TYPE_ARRAY = 15 +TYPE_INTERFACE = 16 +TYPE_GLIST = 17 +TYPE_GSLIST = 18 +TYPE_GHASH = 19 +TYPE_ERROR = 20 +TYPE_UNICHAR = 21 + + +class Field: + def __init__(self, offset, type, shift=0, mask=None): + self._offset = offset + self._type = type + self._shift = shift + self._mask = (1 << mask) - 1 if mask else None + self._name = f"{offset}__{type}__{shift}__{mask}" + + def __get__(self, typelib, _objtype=None): + if typelib is None: + return self + + def shift_mask(n): + n = n >> self._shift + if self._mask: + n = n & self._mask + return n + + tl = typelib[self._offset] + if self._type == "u8": + return shift_mask(tl.u8) + elif self._type == "u16": + return shift_mask(tl.u16) + elif self._type == "u32": + return shift_mask(tl.u32) + elif self._type == "i8": + return shift_mask(tl.i8) + elif self._type == "i16": + return shift_mask(tl.i16) + elif self._type == "i32": + return shift_mask(tl.i32) + elif self._type == "pointer": + return tl.header[tl.u32] + elif self._type == "offset": + return tl + elif self._type == "string": + return tl.string + elif self._type == "dir_entry": + return tl.header.dir_entry(tl.u16) + else: + raise CompilerBugError(self._type) + + +class Typelib: + AS_DIR_ENTRY = Field(0, "dir_entry") + + HEADER_N_ENTRIES = Field(0x14, "u16") + HEADER_N_LOCAL_ENTRIES = Field(0x16, "u16") + HEADER_DIRECTORY = Field(0x18, "pointer") + HEADER_N_ATTRIBUTES = Field(0x1C, "u32") + HEADER_ATTRIBUTES = Field(0x20, "pointer") + + HEADER_DEPENDENCIES = Field(0x24, "pointer") + + HEADER_NAMESPACE = Field(0x2C, "string") + HEADER_NSVERSION = Field(0x30, "string") + + HEADER_ENTRY_BLOB_SIZE = Field(0x3C, "u16") + HEADER_FUNCTION_BLOB_SIZE = Field(0x3E, "u16") + HEADER_CALLBACK_BLOB_SIZE = Field(0x40, "u16") + HEADER_SIGNAL_BLOB_SIZE = Field(0x42, "u16") + HEADER_PROPERTY_BLOB_SIZE = Field(0x48, "u16") + HEADER_FIELD_BLOB_SIZE = Field(0x4A, "u16") + HEADER_VALUE_BLOB_SIZE = Field(0x4C, "u16") + HEADER_ATTRIBUTE_BLOB_SIZE = Field(0x4E, "u16") + HEADER_ENUM_BLOB_SIZE = Field(0x56, "u16") + HEADER_OBJECT_BLOB_SIZE = Field(0x5A, "u16") + HEADER_INTERFACE_BLOB_SIZE = Field(0x5C, "u16") + + DIR_ENTRY_BLOB_TYPE = Field(0x0, "u16") + DIR_ENTRY_LOCAL = Field(0x2, "u16", 0, 1) + DIR_ENTRY_NAME = Field(0x4, "string") + DIR_ENTRY_OFFSET = Field(0x8, "pointer") + DIR_ENTRY_NAMESPACE = Field(0x8, "string") + + ATTR_OFFSET = Field(0x0, "u32") + ATTR_NAME = Field(0x0, "string") + ATTR_VALUE = Field(0x0, "string") + + INTERFACE_TYPE_INTERFACE = Field(0x2, "dir_entry") + + BLOB_NAME = Field(0x4, "string") + + ENUM_GTYPE_NAME = Field(0x8, "string") + ENUM_N_VALUES = Field(0x10, "u16") + ENUM_N_METHODS = Field(0x12, "u16") + ENUM_VALUES = Field(0x18, "offset") + + INTERFACE_GTYPE_NAME = Field(0x8, "string") + INTERFACE_N_PREREQUISITES = Field(0x12, "u16") + INTERFACE_N_PROPERTIES = Field(0x14, "u16") + INTERFACE_N_METHODS = Field(0x16, "u16") + INTERFACE_N_SIGNALS = Field(0x18, "u16") + INTERFACE_N_VFUNCS = Field(0x1A, "u16") + INTERFACE_N_CONSTANTS = Field(0x1C, "u16") + INTERFACE_PREREQUISITES = Field(0x28, "offset") + + OBJ_DEPRECATED = Field(0x02, "u16", 0, 1) + OBJ_ABSTRACT = Field(0x02, "u16", 1, 1) + OBJ_FUNDAMENTAL = Field(0x02, "u16", 2, 1) + OBJ_FINAL = Field(0x02, "u16", 3, 1) + OBJ_GTYPE_NAME = Field(0x08, "string") + OBJ_PARENT = Field(0x10, "dir_entry") + OBJ_GTYPE_STRUCT = Field(0x14, "string") + OBJ_N_INTERFACES = Field(0x14, "u16") + OBJ_N_FIELDS = Field(0x16, "u16") + OBJ_N_PROPERTIES = Field(0x18, "u16") + OBJ_N_METHODS = Field(0x1A, "u16") + OBJ_N_SIGNALS = Field(0x1C, "u16") + OBJ_N_VFUNCS = Field(0x1E, "u16") + OBJ_N_CONSTANTS = Field(0x20, "u16") + OBJ_N_FIELD_CALLBACKS = Field(0x22, "u16") + + PROP_NAME = Field(0x0, "string") + PROP_DEPRECATED = Field(0x4, "u32", 0, 1) + PROP_READABLE = Field(0x4, "u32", 1, 1) + PROP_WRITABLE = Field(0x4, "u32", 2, 1) + PROP_CONSTRUCT = Field(0x4, "u32", 3, 1) + PROP_CONSTRUCT_ONLY = Field(0x4, "u32", 4, 1) + PROP_TYPE = Field(0xC, "u32") + + VALUE_NAME = Field(0x4, "string") + VALUE_VALUE = Field(0x8, "i32") + + def __init__(self, typelib_file, offset): + self._typelib_file = typelib_file + self._offset = offset + + def __getitem__(self, index): + return Typelib(self._typelib_file, self._offset + index) + + def attr(self, name): + return self.header.attr(self._offset, name) + + @property + def header(self): + return TypelibHeader(self._typelib_file) + + @property + def u8(self): + """Gets the 8-bit unsigned int at this location.""" + return self._int(1, False) + + @property + def u16(self): + """Gets the 16-bit unsigned int at this location.""" + return self._int(2, False) + + @property + def u32(self): + """Gets the 32-bit unsigned int at this location.""" + return self._int(4, False) + + @property + def i8(self): + """Gets the 8-bit unsigned int at this location.""" + return self._int(1, True) + + @property + def i16(self): + """Gets the 16-bit unsigned int at this location.""" + return self._int(2, True) + + @property + def i32(self): + """Gets the 32-bit unsigned int at this location.""" + return self._int(4, True) + + @property + def string(self) -> T.Optional[str]: + """Interprets the 32-bit unsigned int at this location as a pointer + within the typelib file, and returns the null-terminated string at that + pointer.""" + + loc = self.u32 + if loc == 0: + return None + + end = loc + while self._typelib_file[end] != 0: + end += 1 + return self._typelib_file[loc:end].decode("utf-8") + + def _int(self, size, signed): + return int.from_bytes(self._typelib_file[self._offset:self._offset + size], 'little') + + +class TypelibHeader(Typelib): + def __init__(self, typelib_file): + super().__init__(typelib_file, 0) + + def dir_entry(self, index): + if index == 0: + return None + else: + return self.HEADER_DIRECTORY[(index - 1) * self.HEADER_ENTRY_BLOB_SIZE] + + def attr(self, offset, name): + lower = 0 + upper = self.HEADER_N_ATTRIBUTES + attr_size = self.HEADER_ATTRIBUTE_BLOB_SIZE + attrs = self.HEADER_ATTRIBUTES + mid = 0 + + while lower <= upper: + mid = math.floor((upper + lower) / 2) + attr = attrs[mid * attr_size] + if attr.ATTR_OFFSET < offset: + lower = mid + 1 + elif attr.ATTR_OFFSET > offset: + upper = mid - 1 + else: + while mid >= 0 and attrs[(mid - 1) * attr_size].ATTR_OFFSET == offset: + mid -= 1 + break + if attrs[mid * attr_size].ATTR_OFFSET != offset: + # no match found + return None + while attrs[mid * attr_size].ATTR_OFFSET == offset: + if attrs[mid * attr_size].ATTR_NAME == name: + return attrs[mid * attr_size].ATTR_VALUE + mid += 1 + return None + + def attr_by_index(self, index): + pass + + @property + def dir_entries(self): + return [self.dir_entry(i) for i in range(self[0x16].u16)] + + +def load_typelib(path: str) -> Typelib: + with open(path, "rb") as f: + return Typelib(f.read(), 0) diff --git a/blueprintcompiler/xml_reader.py b/blueprintcompiler/xml_reader.py index 877d20a..24ae5ff 100644 --- a/blueprintcompiler/xml_reader.py +++ b/blueprintcompiler/xml_reader.py @@ -36,7 +36,7 @@ class Element: def __init__(self, tag, attrs: T.Dict[str, str]): self.tag = tag self.attrs = attrs - self.children: T.Dict[str, T.List["Element"]] = defaultdict(list) + self.children: T.List["Element"] = [] self.cdata_chunks: T.List[str] = [] @cached_property @@ -44,50 +44,42 @@ class Element: return ''.join(self.cdata_chunks) def get_elements(self, name) -> T.List["Element"]: - return self.children.get(name, []) + return [ + child + for child in self.children + if child.tag == name + ] def __getitem__(self, key): return self.attrs.get(key) class Handler(sax.handler.ContentHandler): - def __init__(self, parse_type): + def __init__(self): self.root = None self.stack = [] - self.skipping = 0 - self._interesting_elements = parse_type def startElement(self, name, attrs): - if self._interesting_elements is not None and name not in self._interesting_elements: - self.skipping += 1 - if self.skipping > 0: - return - element = Element(name, attrs.copy()) if len(self.stack): last = self.stack[-1] - last.children[name].append(element) + last.children.append(element) else: self.root = element self.stack.append(element) - def endElement(self, name): - if self.skipping == 0: - self.stack.pop() - if self._interesting_elements is not None and name not in self._interesting_elements: - self.skipping -= 1 + self.stack.pop() def characters(self, content): - if not self.skipping: - self.stack[-1].cdata_chunks.append(content) + self.stack[-1].cdata_chunks.append(content) -def parse(filename, parse_type=None): +def parse(filename): parser = sax.make_parser() - handler = Handler(parse_type) + handler = Handler() parser.setContentHandler(handler) parser.parse(filename) return handler.root diff --git a/tests/fuzz.py b/tests/fuzz.py index 17f2eeb..4f7c879 100644 --- a/tests/fuzz.py +++ b/tests/fuzz.py @@ -3,7 +3,7 @@ from pythonfuzz.main import PythonFuzz sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) -from blueprintcompiler import tokenizer, parser, decompiler +from blueprintcompiler import tokenizer, parser, decompiler, gir from blueprintcompiler.completions import complete from blueprintcompiler.errors import PrintableError, MultipleErrors, CompileError, CompilerBugError from blueprintcompiler.tokenizer import Token, TokenType, tokenize @@ -27,4 +27,8 @@ def fuzz(buf): pass if __name__ == "__main__": + # Make sure Gtk 4.0 is accessible, otherwise every test will fail on that + # and nothing interesting will be tested + gir.get_namespace("Gtk", "4.0") + fuzz()