Files
hacker-laws/.github/website/generate.py
2025-03-10 15:08:18 +00:00

165 lines
5.2 KiB
Python

"""Generate the Hacker Laws website from the Hacker Laws README"""
# TODO:
# - favicon
# - test share links
# - rubbish at bottom of pate
# - crosslink ES/HL/TAI
# - 'back to top' better text
import argparse
import os
import shutil
from jinja2 import Environment, FileSystemLoader
import markdown
from bs4 import BeautifulSoup
def bisect_text(content: str, bisect_line: str) -> tuple[str, str]:
lines = content.splitlines()
head = []
tail = []
found = False
for line in lines:
if found is False and line == bisect_line:
found = True
continue
if found:
tail.append(line)
else:
head.append(line)
return ("\n".join(head), "\n".join(tail))
def load_template():
"""Load Jinja2 template from the specified directory."""
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
return env.get_template(TEMPLATE_FILE)
def prepare_markdown(path: str) -> str:
"""
Pre-process the README markdown by removing content we will not show in
the final website.
"""
# Load the markdown content.
with open(path, "r", encoding="utf-8") as f:
content = f.read()
return content
def parse_markdown(markdown_content: str):
(_, remains) = bisect_text(markdown_content, "<!-- vim-markdown-toc GFM -->")
(_, content) = bisect_text(remains, "<!-- vim-markdown-toc -->")
md = markdown.Markdown(extensions=['toc'])
md.convert(content)
toc = md.toc
markdown_sections = content.split("\n#") # Split by Markdown headings
sections = []
laws = []
for markdown_section in markdown_sections:
if markdown_section.strip():
lines = markdown_section.split("\n", 1)
title = lines[0].strip("# ").strip()
content = md.convert(lines[1] if len(lines) > 1 else "")
full_content = md.convert(markdown_section)
id = title.lower().replace(" ", "-")
laws.append({"title": title, "content": content, "id": id})
sections.append({
"title": title,
"content": content,
"id": id,
"full_content": full_content
})
return (sections, toc)
def extract_static_files(html_content, output_dir):
"""
Extract linked CSS, JS, and image files and copy them to the output
directory.
"""
soup = BeautifulSoup(html_content, "html.parser")
files_to_copy = []
# Extract <link> stylesheets
for link in soup.find_all("link", href=True):
href = link["href"]
if not href.startswith(("http", "//")): # Ignore external links
files_to_copy.append(href)
# Extract <script> files
for script in soup.find_all("script", src=True):
src = script["src"]
if not src.startswith(("http", "//")):
files_to_copy.append(src)
# Extract <img> files
for img in soup.find_all("img", src=True):
src = img["src"]
if not src.startswith(("http", "//")):
files_to_copy.append(src)
# Copy files to the output directory
for file_path in files_to_copy:
src_path = os.path.join(TEMPLATE_DIR, file_path)
dest_path = os.path.join(output_dir, file_path)
if os.path.exists(src_path): # Ensure file exists before copying
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
shutil.copy2(src_path, dest_path)
print(f"📂 Copied: {src_path}{dest_path}")
else:
print(f"⚠️ Warning: Missing file {src_path} (skipping)")
return files_to_copy
def generate_site(markdown_content: str, output_dir: str):
"""Generate the static HTML file from Markdown and Jinja2 template."""
template = load_template()
(sections, toc) = parse_markdown(markdown_content)
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Render HTML
html_output = template.render(toc=toc, sections=sections)
# Save HTML to output directory
output_file = os.path.join(output_dir, "index.html")
with open(output_file, "w", encoding="utf-8") as f:
f.write(html_output)
print(f"✅ Static site generated: {output_file}")
# Copy static files (CSS, JS, images)
extract_static_files(html_output, output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate a static site from Markdown.")
parser.add_argument("-o", "--output-dir", default="build", help="Directory to save the generated site.")
args = parser.parse_args()
# Read environment variables with defaults
TEMPLATE_FILE = os.getenv("TEMPLATE_FILE", "template.html")
TEMPLATE_DIR = os.getenv("TEMPLATE_DIR", ".")
template_path = f"{TEMPLATE_DIR}/{TEMPLATE_FILE}"
markdown_path = os.getenv("MARKDOWN_FILE", "laws.md")
output_dir = args.output_dir
print(f"📝 Loading template from: {template_path}")
print(f"📖 Loading markdown from: {markdown_path}")
print(f"💾 Outputting files to: {output_dir}")
# First, extract that markdown that we want to process.
markdown_content = prepare_markdown(markdown_path)
# Generate the site from the markdown.
generate_site(markdown_content, args.output_dir)