mirror of
https://github.com/aljazceru/Tutorial-Codebase-Knowledge.git
synced 2025-12-18 15:04:20 +01:00
70 lines
2.8 KiB
Python
70 lines
2.8 KiB
Python
import dotenv
|
|
import os
|
|
import argparse
|
|
# Import the function that creates the flow
|
|
from flow import create_tutorial_flow
|
|
|
|
dotenv.load_dotenv()
|
|
|
|
# Default file patterns
|
|
DEFAULT_INCLUDE_PATTERNS = {
|
|
"*.py", "*.js", "*.ts", "*.go", "*.java", "*.pyi", "*.pyx",
|
|
"*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "Dockerfile",
|
|
"Makefile", "*.yaml", "*.yml"
|
|
}
|
|
|
|
DEFAULT_EXCLUDE_PATTERNS = {
|
|
"*test*", "tests/*", "docs/*", "examples/*", "v1/*",
|
|
"dist/*", "build/*", "experimental/*", "deprecated/*",
|
|
"legacy/*", ".git/*", ".github/*"
|
|
}
|
|
|
|
# --- Main Function ---
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase.")
|
|
parser.add_argument("repo_url", help="URL of the public GitHub repository.")
|
|
parser.add_argument("-n", "--name", help="Project name (optional, derived from URL if omitted).")
|
|
parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
|
|
parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
|
|
parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
|
|
parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
|
|
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Get GitHub token from argument or environment variable
|
|
github_token = args.token or os.environ.get('GITHUB_TOKEN')
|
|
if not github_token:
|
|
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
|
|
|
|
# Initialize the shared dictionary with inputs
|
|
shared = {
|
|
"repo_url": args.repo_url,
|
|
"project_name": args.name, # Can be None, FetchRepo will derive it
|
|
"github_token": github_token,
|
|
"output_dir": args.output, # Base directory for CombineTutorial output
|
|
|
|
# Add include/exclude patterns and max file size
|
|
"include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS,
|
|
"exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS,
|
|
"max_file_size": args.max_size,
|
|
|
|
# Outputs will be populated by the nodes
|
|
"files": [],
|
|
"abstractions": [],
|
|
"relationships": {},
|
|
"chapter_order": [],
|
|
"chapters": [],
|
|
"final_output_dir": None
|
|
}
|
|
|
|
print(f"Starting tutorial generation for: {args.repo_url}")
|
|
|
|
# Create the flow instance
|
|
tutorial_flow = create_tutorial_flow()
|
|
|
|
# Run the flow
|
|
tutorial_flow.run(shared)
|
|
|
|
if __name__ == "__main__":
|
|
main() |