import argparse import markdown import os import shutil from jinja2 import Environment, FileSystemLoader from bs4 import BeautifulSoup # Read environment variables with defaults TEMPLATE_FILE = os.getenv("TEMPLATE_FILE", "template.html") MARKDOWN_FILE = os.getenv("MARKDOWN_FILE", "laws.md") TEMPLATE_DIR = os.getenv("TEMPLATE_DIR", ".") # Directory where template is stored def load_template(): """Load Jinja2 template from the specified directory.""" env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) return env.get_template(TEMPLATE_FILE) def parse_markdown(md_file): """Parse a Markdown file and return structured law sections.""" with open(md_file, "r", encoding="utf-8") as f: md_content = f.read() sections = md_content.split("\n## ") # Split by Markdown headings laws = [] for section in sections: if section.strip(): lines = section.split("\n", 1) title = lines[0].strip("# ").strip() content = markdown.markdown(lines[1] if len(lines) > 1 else "") law_id = title.lower().replace(" ", "-") laws.append({"title": title, "content": content, "id": law_id}) return laws def extract_static_files(html_content, output_dir): """Extract linked CSS, JS, and image files and copy them to the output directory.""" soup = BeautifulSoup(html_content, "html.parser") files_to_copy = [] # Extract stylesheets for link in soup.find_all("link", href=True): href = link["href"] if not href.startswith(("http", "//")): # Ignore external links files_to_copy.append(href) # Extract