mirror of
https://github.com/aljazceru/Tutorial-Codebase-Knowledge.git
synced 2025-12-19 07:24:20 +01:00
Added a new crawl_local_files() function that mimics the interface of crawl_github_files() Modified the FetchRepo node to handle both cases Project name is now derived from either: The repository name (from GitHub URL) The directory name (from local path) Or can be manually specified with -n/--name The tool will use the same file pattern matching and size limits for both sources. All other functionality (generating abstractions, relationships, chapters, etc.) remains unchanged since they work with the same file list format.
78 lines
3.2 KiB
Python
78 lines
3.2 KiB
Python
import dotenv
|
|
import os
|
|
import argparse
|
|
# Import the function that creates the flow
|
|
from flow import create_tutorial_flow
|
|
|
|
dotenv.load_dotenv()
|
|
|
|
# Default file patterns
|
|
DEFAULT_INCLUDE_PATTERNS = {
|
|
"*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.go", "*.java", "*.pyi", "*.pyx",
|
|
"*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "Dockerfile",
|
|
"Makefile", "*.yaml", "*.yml",
|
|
}
|
|
|
|
DEFAULT_EXCLUDE_PATTERNS = {
|
|
"*test*", "tests/*", "docs/*", "examples/*", "v1/*",
|
|
"dist/*", "build/*", "experimental/*", "deprecated/*",
|
|
"legacy/*", ".git/*", ".github/*", ".next/*", ".vscode/*", "obj/*", "bin/*", "node_modules/*", "*.log"
|
|
}
|
|
|
|
# --- Main Function ---
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.")
|
|
|
|
# Create mutually exclusive group for source
|
|
source_group = parser.add_mutually_exclusive_group(required=True)
|
|
source_group.add_argument("--repo", help="URL of the public GitHub repository.")
|
|
source_group.add_argument("--dir", help="Path to local directory.")
|
|
|
|
parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
|
|
parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
|
|
parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
|
|
parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
|
|
parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
|
|
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Get GitHub token from argument or environment variable if using repo
|
|
github_token = None
|
|
if args.repo:
|
|
github_token = args.token or os.environ.get('GITHUB_TOKEN')
|
|
if not github_token:
|
|
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
|
|
|
|
# Initialize the shared dictionary with inputs
|
|
shared = {
|
|
"repo_url": args.repo,
|
|
"local_dir": args.dir,
|
|
"project_name": args.name, # Can be None, FetchRepo will derive it
|
|
"github_token": github_token,
|
|
"output_dir": args.output, # Base directory for CombineTutorial output
|
|
|
|
# Add include/exclude patterns and max file size
|
|
"include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS,
|
|
"exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS,
|
|
"max_file_size": args.max_size,
|
|
|
|
# Outputs will be populated by the nodes
|
|
"files": [],
|
|
"abstractions": [],
|
|
"relationships": {},
|
|
"chapter_order": [],
|
|
"chapters": [],
|
|
"final_output_dir": None
|
|
}
|
|
|
|
print(f"Starting tutorial generation for: {args.repo or args.dir}")
|
|
|
|
# Create the flow instance
|
|
tutorial_flow = create_tutorial_flow()
|
|
|
|
# Run the flow
|
|
tutorial_flow.run(shared)
|
|
|
|
if __name__ == "__main__":
|
|
main() |