import argparse
import markdown
import os
import shutil
from jinja2 import Environment, FileSystemLoader
from bs4 import BeautifulSoup
# Read environment variables with defaults
TEMPLATE_FILE = os.getenv("TEMPLATE_FILE", "template.html")
MARKDOWN_FILE = os.getenv("MARKDOWN_FILE", "laws.md")
TEMPLATE_DIR = os.getenv("TEMPLATE_DIR", ".") # Directory where template is stored
def load_template():
"""Load Jinja2 template from the specified directory."""
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
return env.get_template(TEMPLATE_FILE)
def parse_markdown(md_file):
"""Parse a Markdown file and return structured law sections."""
with open(md_file, "r", encoding="utf-8") as f:
md_content = f.read()
sections = md_content.split("\n## ") # Split by Markdown headings
laws = []
for section in sections:
if section.strip():
lines = section.split("\n", 1)
title = lines[0].strip("# ").strip()
content = markdown.markdown(lines[1] if len(lines) > 1 else "")
law_id = title.lower().replace(" ", "-")
laws.append({"title": title, "content": content, "id": law_id})
return laws
def extract_static_files(html_content, output_dir):
"""Extract linked CSS, JS, and image files and copy them to the output directory."""
soup = BeautifulSoup(html_content, "html.parser")
files_to_copy = []
# Extract stylesheets
for link in soup.find_all("link", href=True):
href = link["href"]
if not href.startswith(("http", "//")): # Ignore external links
files_to_copy.append(href)
# Extract