mirror of
https://github.com/aljazceru/Tutorial-Codebase-Knowledge.git
synced 2025-12-20 16:04:20 +01:00
remove translation from the CombineTutorial
This commit is contained in:
301
nodes.py
301
nodes.py
@@ -76,6 +76,7 @@ class IdentifyAbstractions(Node):
|
|||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
files_data = shared["files"]
|
files_data = shared["files"]
|
||||||
project_name = shared["project_name"] # Get project name
|
project_name = shared["project_name"] # Get project name
|
||||||
|
language = shared.get("language", "english") # Get language
|
||||||
|
|
||||||
# Helper to create context from files, respecting limits (basic example)
|
# Helper to create context from files, respecting limits (basic example)
|
||||||
def create_llm_context(files_data):
|
def create_llm_context(files_data):
|
||||||
@@ -91,23 +92,33 @@ class IdentifyAbstractions(Node):
|
|||||||
context, file_info = create_llm_context(files_data)
|
context, file_info = create_llm_context(files_data)
|
||||||
# Format file info for the prompt (comment is just a hint for LLM)
|
# Format file info for the prompt (comment is just a hint for LLM)
|
||||||
file_listing_for_prompt = "\n".join([f"- {idx} # {path}" for idx, path in file_info])
|
file_listing_for_prompt = "\n".join([f"- {idx} # {path}" for idx, path in file_info])
|
||||||
return context, file_listing_for_prompt, len(files_data), project_name # Return project name
|
return context, file_listing_for_prompt, len(files_data), project_name, language # Return language
|
||||||
|
|
||||||
def exec(self, prep_res):
|
def exec(self, prep_res):
|
||||||
context, file_listing_for_prompt, file_count, project_name = prep_res # Unpack project name
|
context, file_listing_for_prompt, file_count, project_name, language = prep_res # Unpack project name and language
|
||||||
print("Identifying abstractions using LLM...")
|
print(f"Identifying abstractions in {language.capitalize()} using LLM...")
|
||||||
|
|
||||||
|
# Add language instruction and hints if not English
|
||||||
|
language_instruction = ""
|
||||||
|
name_lang_hint = ""
|
||||||
|
desc_lang_hint = ""
|
||||||
|
if language.lower() != "english":
|
||||||
|
language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
|
||||||
|
name_lang_hint = f" # (value in {language.capitalize()})"
|
||||||
|
desc_lang_hint = f" # (value in {language.capitalize()})"
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
For the project `{project_name}`:
|
For the project `{project_name}`:
|
||||||
|
|
||||||
Codebase Context:
|
Codebase Context:
|
||||||
{context}
|
{context}
|
||||||
|
|
||||||
Analyze the codebase context.
|
{language_instruction}Analyze the codebase context.
|
||||||
Identify the top 5-10 core most important abstractions to help those new to the codebase.
|
Identify the top 5-10 core most important abstractions to help those new to the codebase.
|
||||||
|
|
||||||
For each abstraction, provide:
|
For each abstraction, provide:
|
||||||
1. A concise `name`.
|
1. A concise `name`{name_lang_hint}.
|
||||||
2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words.
|
2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words{desc_lang_hint}.
|
||||||
3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
|
3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
|
||||||
|
|
||||||
List of file indices and paths present in the context:
|
List of file indices and paths present in the context:
|
||||||
@@ -116,16 +127,16 @@ List of file indices and paths present in the context:
|
|||||||
Format the output as a YAML list of dictionaries:
|
Format the output as a YAML list of dictionaries:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
- name: Query Processing
|
- name: Query Processing{name_lang_hint}
|
||||||
description: |
|
description: |
|
||||||
Explains what the abstraction does.
|
Explains what the abstraction does.
|
||||||
It's like a central dispatcher routing requests.
|
It's like a central dispatcher routing requests.{desc_lang_hint}
|
||||||
file_indices:
|
file_indices:
|
||||||
- 0 # path/to/file1.py
|
- 0 # path/to/file1.py
|
||||||
- 3 # path/to/related.py
|
- 3 # path/to/related.py
|
||||||
- name: Query Optimization
|
- name: Query Optimization{name_lang_hint}
|
||||||
description: |
|
description: |
|
||||||
Another core concept, similar to a blueprint for objects.
|
Another core concept, similar to a blueprint for objects.{desc_lang_hint}
|
||||||
file_indices:
|
file_indices:
|
||||||
- 5 # path/to/another.js
|
- 5 # path/to/another.js
|
||||||
# ... up to 10 abstractions
|
# ... up to 10 abstractions
|
||||||
@@ -143,8 +154,10 @@ Format the output as a YAML list of dictionaries:
|
|||||||
for item in abstractions:
|
for item in abstractions:
|
||||||
if not isinstance(item, dict) or not all(k in item for k in ["name", "description", "file_indices"]):
|
if not isinstance(item, dict) or not all(k in item for k in ["name", "description", "file_indices"]):
|
||||||
raise ValueError(f"Missing keys in abstraction item: {item}")
|
raise ValueError(f"Missing keys in abstraction item: {item}")
|
||||||
|
if not isinstance(item["name"], str):
|
||||||
|
raise ValueError(f"Name is not a string in item: {item}")
|
||||||
if not isinstance(item["description"], str):
|
if not isinstance(item["description"], str):
|
||||||
raise ValueError(f"description is not a string in item: {item}")
|
raise ValueError(f"Description is not a string in item: {item}")
|
||||||
if not isinstance(item["file_indices"], list):
|
if not isinstance(item["file_indices"], list):
|
||||||
raise ValueError(f"file_indices is not a list in item: {item}")
|
raise ValueError(f"file_indices is not a list in item: {item}")
|
||||||
|
|
||||||
@@ -168,12 +181,12 @@ Format the output as a YAML list of dictionaries:
|
|||||||
item["files"] = sorted(list(set(validated_indices)))
|
item["files"] = sorted(list(set(validated_indices)))
|
||||||
# Store only the required fields
|
# Store only the required fields
|
||||||
validated_abstractions.append({
|
validated_abstractions.append({
|
||||||
"name": item["name"],
|
"name": item["name"], # Potentially translated name
|
||||||
"description": item["description"],
|
"description": item["description"], # Potentially translated description
|
||||||
"files": item["files"]
|
"files": item["files"]
|
||||||
})
|
})
|
||||||
|
|
||||||
print(f"Identified {len(validated_abstractions)} abstractions.")
|
print(f"Identified {len(validated_abstractions)} abstractions (in {language.capitalize()}).")
|
||||||
return validated_abstractions
|
return validated_abstractions
|
||||||
|
|
||||||
def post(self, shared, prep_res, exec_res):
|
def post(self, shared, prep_res, exec_res):
|
||||||
@@ -181,9 +194,10 @@ Format the output as a YAML list of dictionaries:
|
|||||||
|
|
||||||
class AnalyzeRelationships(Node):
|
class AnalyzeRelationships(Node):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
abstractions = shared["abstractions"] # Now contains 'files' list of indices
|
abstractions = shared["abstractions"] # Now contains 'files' list of indices, name/description potentially translated
|
||||||
files_data = shared["files"]
|
files_data = shared["files"]
|
||||||
project_name = shared["project_name"] # Get project name
|
project_name = shared["project_name"] # Get project name
|
||||||
|
language = shared.get("language", "english") # Get language
|
||||||
|
|
||||||
# Create context with abstraction names, indices, descriptions, and relevant file snippets
|
# Create context with abstraction names, indices, descriptions, and relevant file snippets
|
||||||
context = "Identified Abstractions:\n"
|
context = "Identified Abstractions:\n"
|
||||||
@@ -192,9 +206,10 @@ class AnalyzeRelationships(Node):
|
|||||||
for i, abstr in enumerate(abstractions):
|
for i, abstr in enumerate(abstractions):
|
||||||
# Use 'files' which contains indices directly
|
# Use 'files' which contains indices directly
|
||||||
file_indices_str = ", ".join(map(str, abstr['files']))
|
file_indices_str = ", ".join(map(str, abstr['files']))
|
||||||
|
# Abstraction name and description might be translated already
|
||||||
info_line = f"- Index {i}: {abstr['name']} (Relevant file indices: [{file_indices_str}])\n Description: {abstr['description']}"
|
info_line = f"- Index {i}: {abstr['name']} (Relevant file indices: [{file_indices_str}])\n Description: {abstr['description']}"
|
||||||
context += info_line + "\n"
|
context += info_line + "\n"
|
||||||
abstraction_info_for_prompt.append(f"{i} # {abstr['name']}")
|
abstraction_info_for_prompt.append(f"{i} # {abstr['name']}") # Use potentially translated name here too
|
||||||
all_relevant_indices.update(abstr['files'])
|
all_relevant_indices.update(abstr['files'])
|
||||||
|
|
||||||
context += "\nRelevant File Snippets (Referenced by Index and Path):\n"
|
context += "\nRelevant File Snippets (Referenced by Index and Path):\n"
|
||||||
@@ -210,26 +225,37 @@ class AnalyzeRelationships(Node):
|
|||||||
)
|
)
|
||||||
context += file_context_str
|
context += file_context_str
|
||||||
|
|
||||||
return context, "\n".join(abstraction_info_for_prompt), project_name # Return project name
|
return context, "\n".join(abstraction_info_for_prompt), project_name, language # Return language
|
||||||
|
|
||||||
def exec(self, prep_res):
|
def exec(self, prep_res):
|
||||||
context, abstraction_listing, project_name = prep_res # Unpack project name
|
context, abstraction_listing, project_name, language = prep_res # Unpack project name and language
|
||||||
print("Analyzing relationships using LLM...")
|
print(f"Analyzing relationships in {language.capitalize()} using LLM...")
|
||||||
|
|
||||||
|
# Add language instruction and hints if not English
|
||||||
|
language_instruction = ""
|
||||||
|
summary_lang_hint = ""
|
||||||
|
label_lang_hint = ""
|
||||||
|
if language.lower() != "english":
|
||||||
|
language_instruction = f"IMPORTANT: Generate the `summary` and relationship `label` fields in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
|
||||||
|
summary_lang_hint = f" (in {language.capitalize()})"
|
||||||
|
label_lang_hint = f" # (value in {language.capitalize()})"
|
||||||
|
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Based on the following abstractions and relevant code snippets from the project `{project_name}`:
|
Based on the following abstractions and relevant code snippets from the project `{project_name}`:
|
||||||
|
|
||||||
List of Abstraction Indices and Names:
|
List of Abstraction Indices and Names (Names might be in {language.capitalize()}):
|
||||||
{abstraction_listing}
|
{abstraction_listing}
|
||||||
|
|
||||||
Context (Abstractions, Descriptions, Code):
|
Context (Abstractions, Descriptions, Code):
|
||||||
{context}
|
{context}
|
||||||
|
|
||||||
Please provide:
|
{language_instruction}Please provide:
|
||||||
1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
|
1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{summary_lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
|
||||||
2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
|
2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
|
||||||
- `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
|
- `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
|
||||||
- `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
|
- `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
|
||||||
- `label`: A brief label for the interaction **in just a few words** (e.g., "Manages", "Inherits", "Uses").
|
- `label`: A brief label for the interaction **in just a few words**{label_lang_hint} (e.g., "Manages", "Inherits", "Uses").
|
||||||
Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
|
Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
|
||||||
Simplify the relationship and exclude those non-important ones.
|
Simplify the relationship and exclude those non-important ones.
|
||||||
|
|
||||||
@@ -239,15 +265,15 @@ Format the output as YAML:
|
|||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
summary: |
|
summary: |
|
||||||
A brief, simple explanation of the project.
|
A brief, simple explanation of the project{summary_lang_hint}.
|
||||||
Can span multiple lines with **bold** and *italic* for emphasis.
|
Can span multiple lines with **bold** and *italic* for emphasis.
|
||||||
relationships:
|
relationships:
|
||||||
- from_abstraction: 0 # AbstractionName1
|
- from_abstraction: 0 # AbstractionName1
|
||||||
to_abstraction: 1 # AbstractionName2
|
to_abstraction: 1 # AbstractionName2
|
||||||
label: "Manages"
|
label: "Manages"{label_lang_hint}
|
||||||
- from_abstraction: 2 # AbstractionName3
|
- from_abstraction: 2 # AbstractionName3
|
||||||
to_abstraction: 0 # AbstractionName1
|
to_abstraction: 0 # AbstractionName1
|
||||||
label: "Provides config"
|
label: "Provides config"{label_lang_hint}
|
||||||
# ... other relationships
|
# ... other relationships
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -286,40 +312,42 @@ Now, provide the YAML output:
|
|||||||
validated_relationships.append({
|
validated_relationships.append({
|
||||||
"from": from_idx,
|
"from": from_idx,
|
||||||
"to": to_idx,
|
"to": to_idx,
|
||||||
"label": rel["label"]
|
"label": rel["label"] # Potentially translated label
|
||||||
})
|
})
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
raise ValueError(f"Could not parse indices from relationship: {rel}")
|
raise ValueError(f"Could not parse indices from relationship: {rel}")
|
||||||
|
|
||||||
print("Generated project summary and relationship details.")
|
print(f"Generated project summary and relationship details (in {language.capitalize()}).")
|
||||||
return {
|
return {
|
||||||
"summary": relationships_data["summary"],
|
"summary": relationships_data["summary"], # Potentially translated summary
|
||||||
"details": validated_relationships # Store validated, index-based relationships
|
"details": validated_relationships # Store validated, index-based relationships with potentially translated labels
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def post(self, shared, prep_res, exec_res):
|
def post(self, shared, prep_res, exec_res):
|
||||||
# Structure is now {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
|
# Structure is now {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
|
||||||
|
# Summary and label might be translated
|
||||||
shared["relationships"] = exec_res
|
shared["relationships"] = exec_res
|
||||||
|
|
||||||
class OrderChapters(Node):
|
class OrderChapters(Node):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
abstractions = shared["abstractions"]
|
abstractions = shared["abstractions"] # Name/description might be translated
|
||||||
relationships = shared["relationships"]
|
relationships = shared["relationships"] # Summary/label might be translated
|
||||||
project_name = shared["project_name"] # Get project name
|
project_name = shared["project_name"] # Get project name
|
||||||
|
|
||||||
# Prepare context for the LLM
|
# Prepare context for the LLM
|
||||||
abstraction_info_for_prompt = []
|
abstraction_info_for_prompt = []
|
||||||
for i, a in enumerate(abstractions):
|
for i, a in enumerate(abstractions):
|
||||||
abstraction_info_for_prompt.append(f"- {i} # {a['name']}")
|
abstraction_info_for_prompt.append(f"- {i} # {a['name']}") # Use potentially translated name
|
||||||
abstraction_listing = "\n".join(abstraction_info_for_prompt)
|
abstraction_listing = "\n".join(abstraction_info_for_prompt)
|
||||||
|
|
||||||
|
# Use potentially translated summary and labels
|
||||||
context = f"Project Summary:\n{relationships['summary']}\n\n"
|
context = f"Project Summary:\n{relationships['summary']}\n\n"
|
||||||
context += "Relationships (Indices refer to abstractions above):\n"
|
context += "Relationships (Indices refer to abstractions above):\n"
|
||||||
for rel in relationships['details']:
|
for rel in relationships['details']:
|
||||||
from_name = abstractions[rel['from']]['name']
|
from_name = abstractions[rel['from']]['name']
|
||||||
to_name = abstractions[rel['to']]['name']
|
to_name = abstractions[rel['to']]['name']
|
||||||
# Use 'label' instead of 'desc'
|
# Use potentially translated 'label'
|
||||||
context += f"- From {rel['from']} ({from_name}) to {rel['to']} ({to_name}): {rel['label']}\n"
|
context += f"- From {rel['from']} ({from_name}) to {rel['to']} ({to_name}): {rel['label']}\n"
|
||||||
|
|
||||||
return abstraction_listing, context, len(abstractions), project_name
|
return abstraction_listing, context, len(abstractions), project_name
|
||||||
@@ -327,6 +355,7 @@ class OrderChapters(Node):
|
|||||||
def exec(self, prep_res):
|
def exec(self, prep_res):
|
||||||
abstraction_listing, context, num_abstractions, project_name = prep_res
|
abstraction_listing, context, num_abstractions, project_name = prep_res
|
||||||
print("Determining chapter order using LLM...")
|
print("Determining chapter order using LLM...")
|
||||||
|
# No language variation needed here, just ordering based on structure
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Given the following project abstractions and their relationships for the project ```` {project_name} ````:
|
Given the following project abstractions and their relationships for the project ```` {project_name} ````:
|
||||||
|
|
||||||
@@ -353,7 +382,6 @@ Now, provide the YAML output:
|
|||||||
response = call_llm(prompt)
|
response = call_llm(prompt)
|
||||||
|
|
||||||
# --- Validation ---
|
# --- Validation ---
|
||||||
# Rely on Node's built-in retry/fallback
|
|
||||||
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
|
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
|
||||||
ordered_indices_raw = yaml.safe_load(yaml_str)
|
ordered_indices_raw = yaml.safe_load(yaml_str)
|
||||||
|
|
||||||
@@ -395,8 +423,10 @@ Now, provide the YAML output:
|
|||||||
class WriteChapters(BatchNode):
|
class WriteChapters(BatchNode):
|
||||||
def prep(self, shared):
|
def prep(self, shared):
|
||||||
chapter_order = shared["chapter_order"] # List of indices
|
chapter_order = shared["chapter_order"] # List of indices
|
||||||
abstractions = shared["abstractions"] # List of dicts, now using 'files' with indices
|
abstractions = shared["abstractions"] # List of dicts, name/desc potentially translated
|
||||||
files_data = shared["files"]
|
files_data = shared["files"]
|
||||||
|
language = shared.get("language", "english") # Get language
|
||||||
|
|
||||||
# Get already written chapters to provide context
|
# Get already written chapters to provide context
|
||||||
# We store them temporarily during the batch run, not in shared memory yet
|
# We store them temporarily during the batch run, not in shared memory yet
|
||||||
# The 'previous_chapters_summary' will be built progressively in the exec context
|
# The 'previous_chapters_summary' will be built progressively in the exec context
|
||||||
@@ -408,11 +438,11 @@ class WriteChapters(BatchNode):
|
|||||||
for i, abstraction_index in enumerate(chapter_order):
|
for i, abstraction_index in enumerate(chapter_order):
|
||||||
if 0 <= abstraction_index < len(abstractions):
|
if 0 <= abstraction_index < len(abstractions):
|
||||||
chapter_num = i + 1
|
chapter_num = i + 1
|
||||||
chapter_name = abstractions[abstraction_index]["name"]
|
chapter_name = abstractions[abstraction_index]["name"] # Potentially translated name
|
||||||
# Create safe filename
|
# Create safe filename (from potentially translated name)
|
||||||
safe_name = "".join(c if c.isalnum() else '_' for c in chapter_name).lower()
|
safe_name = "".join(c if c.isalnum() else '_' for c in chapter_name).lower()
|
||||||
filename = f"{i+1:02d}_{safe_name}.md"
|
filename = f"{i+1:02d}_{safe_name}.md"
|
||||||
# Format with link
|
# Format with link (using potentially translated name)
|
||||||
all_chapters.append(f"{chapter_num}. [{chapter_name}]({filename})")
|
all_chapters.append(f"{chapter_num}. [{chapter_name}]({filename})")
|
||||||
# Store mapping of chapter index to filename for linking
|
# Store mapping of chapter index to filename for linking
|
||||||
chapter_filenames[abstraction_index] = {"num": chapter_num, "name": chapter_name, "filename": filename}
|
chapter_filenames[abstraction_index] = {"num": chapter_num, "name": chapter_name, "filename": filename}
|
||||||
@@ -423,52 +453,51 @@ class WriteChapters(BatchNode):
|
|||||||
items_to_process = []
|
items_to_process = []
|
||||||
for i, abstraction_index in enumerate(chapter_order):
|
for i, abstraction_index in enumerate(chapter_order):
|
||||||
if 0 <= abstraction_index < len(abstractions):
|
if 0 <= abstraction_index < len(abstractions):
|
||||||
abstraction_details = abstractions[abstraction_index]
|
abstraction_details = abstractions[abstraction_index] # Contains potentially translated name/desc
|
||||||
# Use 'files' (list of indices) directly
|
# Use 'files' (list of indices) directly
|
||||||
related_file_indices = abstraction_details.get("files", [])
|
related_file_indices = abstraction_details.get("files", [])
|
||||||
# Get content using helper, passing indices
|
# Get content using helper, passing indices
|
||||||
related_files_content_map = get_content_for_indices(files_data, related_file_indices)
|
related_files_content_map = get_content_for_indices(files_data, related_file_indices)
|
||||||
|
|
||||||
# Get previous chapter info for transitions
|
# Get previous chapter info for transitions (uses potentially translated name)
|
||||||
prev_chapter = None
|
prev_chapter = None
|
||||||
if i > 0:
|
if i > 0:
|
||||||
prev_idx = chapter_order[i-1]
|
prev_idx = chapter_order[i-1]
|
||||||
prev_chapter = chapter_filenames[prev_idx]
|
prev_chapter = chapter_filenames[prev_idx]
|
||||||
|
|
||||||
# Get next chapter info for transitions
|
# Get next chapter info for transitions (uses potentially translated name)
|
||||||
next_chapter = None
|
next_chapter = None
|
||||||
if i < len(chapter_order) - 1:
|
if i < len(chapter_order) - 1:
|
||||||
next_idx = chapter_order[i+1]
|
next_idx = chapter_order[i+1]
|
||||||
next_chapter = chapter_filenames[next_idx]
|
next_chapter = chapter_filenames[next_idx]
|
||||||
|
|
||||||
# Get language from shared store, default to English
|
|
||||||
language = shared.get("language", "english")
|
|
||||||
|
|
||||||
items_to_process.append({
|
items_to_process.append({
|
||||||
"chapter_num": i + 1,
|
"chapter_num": i + 1,
|
||||||
"abstraction_index": abstraction_index,
|
"abstraction_index": abstraction_index,
|
||||||
"abstraction_details": abstraction_details,
|
"abstraction_details": abstraction_details, # Has potentially translated name/desc
|
||||||
"related_files_content_map": related_files_content_map,
|
"related_files_content_map": related_files_content_map,
|
||||||
"project_name": shared["project_name"], # Add project name
|
"project_name": shared["project_name"], # Add project name
|
||||||
"full_chapter_listing": full_chapter_listing, # Add the full chapter listing
|
"full_chapter_listing": full_chapter_listing, # Add the full chapter listing (uses potentially translated names)
|
||||||
"chapter_filenames": chapter_filenames, # Add chapter filenames mapping
|
"chapter_filenames": chapter_filenames, # Add chapter filenames mapping (uses potentially translated names)
|
||||||
"prev_chapter": prev_chapter, # Add previous chapter info
|
"prev_chapter": prev_chapter, # Add previous chapter info (uses potentially translated name)
|
||||||
"next_chapter": next_chapter, # Add next chapter info
|
"next_chapter": next_chapter, # Add next chapter info (uses potentially translated name)
|
||||||
"language": language, # Add language for multi-language support
|
"language": language, # Add language for multi-language support
|
||||||
# previous_chapters_summary will be added dynamically in exec
|
# previous_chapters_summary will be added dynamically in exec
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
print(f"Warning: Invalid abstraction index {abstraction_index} in chapter_order. Skipping.")
|
print(f"Warning: Invalid abstraction index {abstraction_index} in chapter_order. Skipping.")
|
||||||
|
|
||||||
print(f"Preparing to write {len(items_to_process)} chapters...")
|
print(f"Preparing to write {len(items_to_process)} chapters in {language.capitalize()}...")
|
||||||
return items_to_process # Iterable for BatchNode
|
return items_to_process # Iterable for BatchNode
|
||||||
|
|
||||||
def exec(self, item):
|
def exec(self, item):
|
||||||
# This runs for each item prepared above
|
# This runs for each item prepared above
|
||||||
abstraction_name = item["abstraction_details"]["name"]
|
abstraction_name = item["abstraction_details"]["name"] # Potentially translated name
|
||||||
|
abstraction_description = item["abstraction_details"]["description"] # Potentially translated description
|
||||||
chapter_num = item["chapter_num"]
|
chapter_num = item["chapter_num"]
|
||||||
project_name = item.get("project_name") # Get from item
|
project_name = item.get("project_name")
|
||||||
print(f"Writing chapter {chapter_num} for: {abstraction_name} using LLM...")
|
language = item.get("language", "english")
|
||||||
|
print(f"Writing chapter {chapter_num} for: {abstraction_name} (in {language.capitalize()}) using LLM...")
|
||||||
|
|
||||||
# Prepare file context string from the map
|
# Prepare file context string from the map
|
||||||
file_context_str = "\n\n".join(
|
file_context_str = "\n\n".join(
|
||||||
@@ -480,58 +509,54 @@ class WriteChapters(BatchNode):
|
|||||||
# Use the temporary instance variable
|
# Use the temporary instance variable
|
||||||
previous_chapters_summary = "\n---\n".join(self.chapters_written_so_far)
|
previous_chapters_summary = "\n---\n".join(self.chapters_written_so_far)
|
||||||
|
|
||||||
# Get language from item, default to English
|
# Add language instruction if not English - the chapter content itself needs translation
|
||||||
language = item.get("language", "english")
|
|
||||||
|
|
||||||
# Add language instruction if not English
|
|
||||||
language_instruction = ""
|
language_instruction = ""
|
||||||
if language.lower() != "english":
|
if language.lower() != "english":
|
||||||
language_instruction = f"IMPORTANT: Write this ENTIRE tutorial chapter in {language} language. You MUST translate ALL content including explanations, examples, code comments, and technical terms into {language}. DO NOT use English anywhere except in code syntax and proper nouns. The entire output should be in {language} only.\n\n"
|
language_instruction = f"IMPORTANT: Write this ENTIRE tutorial chapter in **{language.capitalize()}** language. The concept name '{abstraction_name}' and its description are already provided in {language.capitalize()}. You MUST translate ALL other content including explanations, examples, code comments (unless essential for syntax), and technical terms into {language.capitalize()}. DO NOT use English anywhere except in code syntax, required proper nouns or where specified. The entire output MUST be in {language.capitalize()} only.\n\n"
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
{language_instruction}
|
{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
|
||||||
Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
|
|
||||||
|
|
||||||
Concept Details:
|
Concept Details (already in {language.capitalize()}):
|
||||||
|
- Name: {abstraction_name}
|
||||||
- Description:
|
- Description:
|
||||||
{item["abstraction_details"]["description"]}
|
{abstraction_description}
|
||||||
|
|
||||||
Complete Tutorial Structure:
|
Complete Tutorial Structure (Chapter names might be in {language.capitalize()}):
|
||||||
{item["full_chapter_listing"]}
|
{item["full_chapter_listing"]}
|
||||||
|
|
||||||
Context from previous chapters (summary):
|
Context from previous chapters (summary, also in {language.capitalize()}):
|
||||||
{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."}
|
{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."}
|
||||||
|
|
||||||
Relevant Code Snippets:
|
Relevant Code Snippets (Code itself remains unchanged):
|
||||||
{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
|
{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
|
||||||
|
|
||||||
Instructions for the chapter:
|
Instructions for the chapter (Translate explanations into {language.capitalize()}):
|
||||||
- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`).
|
- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided {language.capitalize()} name.
|
||||||
|
|
||||||
- If this is not the first chapter, begin with a brief transition from the previous chapter, referencing it with a proper Markdown link.
|
- If this is not the first chapter, begin with a brief transition from the previous chapter (in {language.capitalize()}), referencing it with a proper Markdown link using its {language.capitalize()} name.
|
||||||
|
|
||||||
- Begin with a high-level motivation explaining what problem this abstraction solves. Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners.
|
- Begin with a high-level motivation explaining what problem this abstraction solves (in {language.capitalize()}). Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners.
|
||||||
|
|
||||||
- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way.
|
- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way (in {language.capitalize()}).
|
||||||
|
|
||||||
- Explain how to use this abstraction to solve the use case. Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen).
|
- Explain how to use this abstraction to solve the use case (in {language.capitalize()}). Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen in {language.capitalize()}).
|
||||||
|
|
||||||
- Each code block should be BELOW 20 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it.
|
- Each code block should be BELOW 20 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments (translate to {language.capitalize()} if possible, otherwise keep minimal English for clarity) to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it (in {language.capitalize()}).
|
||||||
|
|
||||||
- Describe the internal implementation to help understand what's under the hood. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called. It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use:
|
- Describe the internal implementation to help understand what's under the hood (in {language.capitalize()}). First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called (in {language.capitalize()}). It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing` (Use the {language.capitalize()} name if appropriate for participant labels).
|
||||||
`participant QP as Query Processing`
|
|
||||||
|
|
||||||
- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly.
|
- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Explain in {language.capitalize()}.
|
||||||
|
|
||||||
- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename. Example: "we will talk about [Query Processing](03_query_processing.md) in Chapter 3".
|
- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename and the (potentially {language.capitalize()}) chapter title. Example: "we will talk about [Query Processing](03_query_processing.md) in Chapter 3". Translate the surrounding text.
|
||||||
|
|
||||||
- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format).
|
- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). Translate labels/text within diagrams where appropriate.
|
||||||
|
|
||||||
- Heavily use analogies and examples throughout to help beginners understand.
|
- Heavily use analogies and examples throughout (in {language.capitalize()}) to help beginners understand.
|
||||||
|
|
||||||
- End the chapter with a brief conclusion that summarizes what was learned and provides a transition to the next chapter. If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename).
|
- End the chapter with a brief conclusion that summarizes what was learned (in {language.capitalize()}) and provides a transition to the next chapter (in {language.capitalize()}). If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename). Use the {language.capitalize()} title.
|
||||||
|
|
||||||
- Ensure the tone is welcoming and easy for a newcomer to understand.
|
- Ensure the tone is welcoming and easy for a newcomer to understand (appropriate for {language.capitalize()} readers).
|
||||||
|
|
||||||
- Output *only* the Markdown content for this chapter.
|
- Output *only* the Markdown content for this chapter.
|
||||||
|
|
||||||
@@ -539,7 +564,7 @@ Now, directly provide a super beginner-friendly Markdown output (DON'T need ```m
|
|||||||
"""
|
"""
|
||||||
chapter_content = call_llm(prompt)
|
chapter_content = call_llm(prompt)
|
||||||
# Basic validation/cleanup
|
# Basic validation/cleanup
|
||||||
actual_heading = f"# Chapter {chapter_num}: {abstraction_name}"
|
actual_heading = f"# Chapter {chapter_num}: {abstraction_name}" # Use potentially translated name
|
||||||
if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):
|
if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):
|
||||||
# Add heading if missing or incorrect, trying to preserve content
|
# Add heading if missing or incorrect, trying to preserve content
|
||||||
lines = chapter_content.strip().split('\n')
|
lines = chapter_content.strip().split('\n')
|
||||||
@@ -552,7 +577,7 @@ Now, directly provide a super beginner-friendly Markdown output (DON'T need ```m
|
|||||||
# Add the generated content to our temporary list for the next iteration's context
|
# Add the generated content to our temporary list for the next iteration's context
|
||||||
self.chapters_written_so_far.append(chapter_content)
|
self.chapters_written_so_far.append(chapter_content)
|
||||||
|
|
||||||
return chapter_content # Return the Markdown string
|
return chapter_content # Return the Markdown string (potentially translated)
|
||||||
|
|
||||||
def post(self, shared, prep_res, exec_res_list):
|
def post(self, shared, prep_res, exec_res_list):
|
||||||
# exec_res_list contains the generated Markdown for each chapter, in order
|
# exec_res_list contains the generated Markdown for each chapter, in order
|
||||||
@@ -566,31 +591,31 @@ class CombineTutorial(Node):
|
|||||||
project_name = shared["project_name"]
|
project_name = shared["project_name"]
|
||||||
output_base_dir = shared.get("output_dir", "output") # Default output dir
|
output_base_dir = shared.get("output_dir", "output") # Default output dir
|
||||||
output_path = os.path.join(output_base_dir, project_name)
|
output_path = os.path.join(output_base_dir, project_name)
|
||||||
repo_url = shared["repo_url"] # Get the repository URL
|
repo_url = shared.get("repo_url") # Get the repository URL
|
||||||
|
# language = shared.get("language", "english") # No longer needed for fixed strings
|
||||||
|
|
||||||
# Use 'label' from relationships_data['details']
|
# Get potentially translated data
|
||||||
relationships_data = shared["relationships"] # {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
|
relationships_data = shared["relationships"] # {"summary": str, "details": [{"from": int, "to": int, "label": str}]} -> summary/label potentially translated
|
||||||
chapter_order = shared["chapter_order"] # indices
|
chapter_order = shared["chapter_order"] # indices
|
||||||
abstractions = shared["abstractions"] # list of dicts
|
abstractions = shared["abstractions"] # list of dicts -> name/description potentially translated
|
||||||
chapters_content = shared["chapters"] # list of strings
|
chapters_content = shared["chapters"] # list of strings -> content potentially translated
|
||||||
|
|
||||||
# --- Generate Mermaid Diagram ---
|
# --- Generate Mermaid Diagram ---
|
||||||
mermaid_lines = ["flowchart TD"]
|
mermaid_lines = ["flowchart TD"]
|
||||||
# Add nodes for each abstraction
|
# Add nodes for each abstraction using potentially translated names
|
||||||
for i, abstr in enumerate(abstractions):
|
for i, abstr in enumerate(abstractions):
|
||||||
# Sanitize name for Mermaid ID and label
|
|
||||||
node_id = f"A{i}"
|
node_id = f"A{i}"
|
||||||
|
# Use potentially translated name, sanitize for Mermaid ID and label
|
||||||
sanitized_name = abstr['name'].replace('"', '')
|
sanitized_name = abstr['name'].replace('"', '')
|
||||||
node_label = sanitized_name # Using sanitized name only, no index
|
node_label = sanitized_name # Using sanitized name only
|
||||||
mermaid_lines.append(f' {node_id}["{node_label}"]')
|
mermaid_lines.append(f' {node_id}["{node_label}"]')
|
||||||
# Add edges for relationships using 'label'
|
# Add edges for relationships using potentially translated labels
|
||||||
for rel in relationships_data['details']:
|
for rel in relationships_data['details']:
|
||||||
from_node_id = f"A{rel['from']}"
|
from_node_id = f"A{rel['from']}"
|
||||||
to_node_id = f"A{rel['to']}"
|
to_node_id = f"A{rel['to']}"
|
||||||
# Sanitize 'label' for edge label
|
# Use potentially translated label, sanitize
|
||||||
edge_label = rel['label'].replace('"', '').replace('\n', ' ') # Basic sanitization
|
edge_label = rel['label'].replace('"', '').replace('\n', ' ') # Basic sanitization
|
||||||
# Limit edge label length for readability (optional, but good for diagrams)
|
max_label_len = 30
|
||||||
max_label_len = 30 # Make it shorter for labels
|
|
||||||
if len(edge_label) > max_label_len:
|
if len(edge_label) > max_label_len:
|
||||||
edge_label = edge_label[:max_label_len-3] + "..."
|
edge_label = edge_label[:max_label_len-3] + "..."
|
||||||
mermaid_lines.append(f' {from_node_id} -- "{edge_label}" --> {to_node_id}')
|
mermaid_lines.append(f' {from_node_id} -- "{edge_label}" --> {to_node_id}')
|
||||||
@@ -598,93 +623,43 @@ class CombineTutorial(Node):
|
|||||||
mermaid_diagram = "\n".join(mermaid_lines)
|
mermaid_diagram = "\n".join(mermaid_lines)
|
||||||
# --- End Mermaid ---
|
# --- End Mermaid ---
|
||||||
|
|
||||||
|
# --- Prepare index.md content ---
|
||||||
|
index_content = f"# Tutorial: {project_name}\n\n"
|
||||||
|
index_content += f"{relationships_data['summary']}\n\n" # Use the potentially translated summary directly
|
||||||
|
index_content += f"**Source Repository:** [{repo_url}]({repo_url})\n\n" # English "Source Repository"
|
||||||
|
|
||||||
# Get language from shared store, default to English
|
# Add Mermaid diagram for relationships (diagram itself uses potentially translated names/labels)
|
||||||
language = shared.get("language", "english")
|
index_content += "```mermaid\n"
|
||||||
|
index_content += mermaid_diagram + "\n"
|
||||||
|
index_content += "```\n\n"
|
||||||
|
|
||||||
# Prepare index.md content with language-specific titles
|
index_content += f"## Chapters\n\n" # English "Chapters"
|
||||||
if language.lower() != "english":
|
|
||||||
# For non-English languages, translate the content using LLM
|
|
||||||
|
|
||||||
# 1. Translate the title
|
|
||||||
title_prompt = f"Translate only the word 'Tutorial' to {language} language. Respond with just the translated word, nothing else."
|
|
||||||
translated_title = call_llm(title_prompt).strip()
|
|
||||||
index_content = f"# {translated_title}: {project_name}\n\n"
|
|
||||||
|
|
||||||
# 2. Translate the relationship summary
|
|
||||||
summary_prompt = f"Translate the following text to {language} language:\n\n{relationships_data['summary']}"
|
|
||||||
translated_summary = call_llm(summary_prompt)
|
|
||||||
index_content += f"{translated_summary}\n\n"
|
|
||||||
|
|
||||||
# 3. Translate "Source Repository"
|
|
||||||
repo_prompt = f"Translate only the phrase 'Source Repository' to {language} language. Respond with just the translated phrase, nothing else."
|
|
||||||
translated_repo = call_llm(repo_prompt).strip()
|
|
||||||
index_content += f"**{translated_repo}:** [{repo_url}]({repo_url})\n\n"
|
|
||||||
|
|
||||||
# Add Mermaid diagram for relationships
|
|
||||||
index_content += "```mermaid\n"
|
|
||||||
index_content += mermaid_diagram + "\n"
|
|
||||||
index_content += "```\n\n"
|
|
||||||
|
|
||||||
# 4. Translate "Chapters"
|
|
||||||
chapters_prompt = f"Translate only the word 'Chapters' to {language} language. Respond with just the translated word, nothing else."
|
|
||||||
translated_chapters = call_llm(chapters_prompt).strip()
|
|
||||||
index_content += f"## {translated_chapters}\n\n"
|
|
||||||
else:
|
|
||||||
# Original English content
|
|
||||||
index_content = f"# Tutorial: {project_name}\n\n"
|
|
||||||
index_content += f"{relationships_data['summary']}\n\n"
|
|
||||||
index_content += f"**Source Repository:** [{repo_url}]({repo_url})\n\n"
|
|
||||||
|
|
||||||
# Add Mermaid diagram for relationships
|
|
||||||
index_content += "```mermaid\n"
|
|
||||||
index_content += mermaid_diagram + "\n"
|
|
||||||
index_content += "```\n\n"
|
|
||||||
|
|
||||||
index_content += "## Chapters\n\n"
|
|
||||||
|
|
||||||
chapter_files = []
|
chapter_files = []
|
||||||
# Generate chapter links based on the determined order
|
# Generate chapter links based on the determined order, using potentially translated names
|
||||||
for i, abstraction_index in enumerate(chapter_order):
|
for i, abstraction_index in enumerate(chapter_order):
|
||||||
# Ensure index is valid and we have content for it
|
# Ensure index is valid and we have content for it
|
||||||
if 0 <= abstraction_index < len(abstractions) and i < len(chapters_content):
|
if 0 <= abstraction_index < len(abstractions) and i < len(chapters_content):
|
||||||
abstraction_name = abstractions[abstraction_index]["name"]
|
abstraction_name = abstractions[abstraction_index]["name"] # Potentially translated name
|
||||||
# Sanitize name for filename
|
# Sanitize potentially translated name for filename
|
||||||
safe_name = "".join(c if c.isalnum() else '_' for c in abstraction_name).lower()
|
safe_name = "".join(c if c.isalnum() else '_' for c in abstraction_name).lower()
|
||||||
# Use chapter number (i+1) for ordering filename
|
|
||||||
filename = f"{i+1:02d}_{safe_name}.md"
|
filename = f"{i+1:02d}_{safe_name}.md"
|
||||||
index_content += f"{i+1}. [{abstraction_name}]({filename})\n"
|
index_content += f"{i+1}. [{abstraction_name}]({filename})\n" # Use potentially translated name in link text
|
||||||
|
|
||||||
# Add attribution to chapter content
|
# Add attribution to chapter content (using English fixed string)
|
||||||
chapter_content = chapters_content[i]
|
chapter_content = chapters_content[i] # Potentially translated content
|
||||||
if not chapter_content.endswith("\n\n"):
|
if not chapter_content.endswith("\n\n"):
|
||||||
chapter_content += "\n\n"
|
chapter_content += "\n\n"
|
||||||
|
|
||||||
# Add attribution with language-specific text
|
chapter_content += f"---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)" # English "Generated by"
|
||||||
if language.lower() != "english":
|
|
||||||
# Translate "Generated by" to the target language
|
|
||||||
gen_prompt = f"Translate only the phrase 'Generated by' to {language} language. Respond with just the translated phrase, nothing else."
|
|
||||||
translated_gen = call_llm(gen_prompt).strip()
|
|
||||||
chapter_content += f"---\n\n{translated_gen} [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
|
|
||||||
else:
|
|
||||||
chapter_content += "---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
|
|
||||||
|
|
||||||
# Store filename and corresponding content
|
# Store filename and corresponding content
|
||||||
chapter_files.append({"filename": filename, "content": chapter_content})
|
chapter_files.append({"filename": filename, "content": chapter_content})
|
||||||
else:
|
else:
|
||||||
print(f"Warning: Mismatch between chapter order, abstractions, or content at index {i} (abstraction index {abstraction_index}). Skipping file generation for this entry.")
|
print(f"Warning: Mismatch between chapter order, abstractions, or content at index {i} (abstraction index {abstraction_index}). Skipping file generation for this entry.")
|
||||||
|
|
||||||
# Add attribution to index content with language-specific text
|
# Add attribution to index content (using English fixed string)
|
||||||
if language.lower() != "english":
|
index_content += f"\n\n---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)" # English "Generated by"
|
||||||
# We already have the translated "Generated by" phrase from above, reuse it
|
|
||||||
# If not available (which shouldn't happen), translate it again
|
|
||||||
if 'translated_gen' not in locals():
|
|
||||||
gen_prompt = f"Translate only the phrase 'Generated by' to {language} language. Respond with just the translated phrase, nothing else."
|
|
||||||
translated_gen = call_llm(gen_prompt).strip()
|
|
||||||
|
|
||||||
index_content += f"\n\n---\n\n{translated_gen} [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
|
|
||||||
else:
|
|
||||||
index_content += "\n\n---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"output_path": output_path,
|
"output_path": output_path,
|
||||||
|
|||||||
Reference in New Issue
Block a user