blueprint-compiler/docs/collect-sections.py
Sertonix 394014429e Sort keys in collect-sections.py
This makes sure that the reference_docs.json file
is build reproducible.

Ref https://reproducible-builds.org/
2025-03-24 22:58:43 +00:00

139 lines
3.8 KiB
Python
Executable file

#!/usr/bin/env python3
import json
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
__all__ = ["get_docs_section"]
DOCS_ROOT = "https://jwestman.pages.gitlab.gnome.org/blueprint-compiler"
sections: dict[str, "Section"] = {}
@dataclass
class Section:
link: str
lines: str
def to_json(self):
return {
"content": rst_to_md(self.lines),
"link": self.link,
}
def load_reference_docs():
for filename in Path(os.path.dirname(__file__), "reference").glob("*.rst"):
with open(filename) as f:
section_name = None
lines = []
def close_section():
if section_name:
html_file = re.sub(r"\.rst$", ".html", filename.name)
anchor = re.sub(r"[^a-z0-9]+", "-", section_name.lower())
link = f"{DOCS_ROOT}/reference/{html_file}#{anchor}"
sections[section_name] = Section(link, lines)
for line in f:
if m := re.match(r"\.\.\s+_(.*):", line):
close_section()
section_name = m.group(1)
lines = []
else:
lines.append(line)
close_section()
# This isn't a comprehensive rST to markdown converter, it just needs to handle the
# small subset of rST used in the reference docs.
def rst_to_md(lines: list[str]) -> str:
result = ""
def rst_to_md_inline(line):
line = re.sub(r"``(.*?)``", r"`\1`", line)
line = re.sub(
r":ref:`(.*?)<(.*?)>`",
lambda m: f"[{m.group(1)}]({sections[m.group(2)].link})",
line,
)
line = re.sub(r"`([^`]*?) <([^`>]*?)>`_", r"[\1](\2)", line)
return line
i = 0
n = len(lines)
heading_levels = {}
def print_block(lang: str = "", code: bool = True, strip_links: bool = False):
nonlocal result, i
block = ""
while i < n:
line = lines[i].rstrip()
if line.startswith(" "):
line = line[3:]
elif line != "":
break
if strip_links:
line = re.sub(r":ref:`(.*?)<(.*?)>`", r"\1", line)
if not code:
line = rst_to_md_inline(line)
block += line + "\n"
i += 1
if code:
result += f"```{lang}\n{block.strip()}\n```\n\n"
else:
result += block
while i < n:
line = lines[i].rstrip()
i += 1
if line == ".. rst-class:: grammar-block":
print_block("text", strip_links=True)
elif line == ".. code-block:: blueprint":
print_block("blueprint")
elif line == ".. note::":
result += "#### Note\n"
print_block(code=False)
elif m := re.match(r"\.\. image:: (.*)", line):
result += f"![{m.group(1)}]({DOCS_ROOT}/_images/{m.group(1)})\n"
elif i < n and re.match(r"^((-+)|(~+)|(\++))$", lines[i]):
level_char = lines[i][0]
if level_char not in heading_levels:
heading_levels[level_char] = max(heading_levels.values(), default=1) + 1
result += (
"#" * heading_levels[level_char] + " " + rst_to_md_inline(line) + "\n"
)
i += 1
else:
result += rst_to_md_inline(line) + "\n"
return result
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: collect_sections.py <output_file>")
sys.exit(1)
outfile = sys.argv[1]
load_reference_docs()
# print the sections to a json file
with open(outfile, "w") as f:
json.dump(
{name: section.to_json() for name, section in sections.items()},
f,
indent=2,
sort_keys=True,
)