mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 20:55:09 +01:00
Merge branch 'main' of github.com:AntonOsika/gpt-engineer
* 'main' of github.com:AntonOsika/gpt-engineer: Mark test as failed because it requires OpenAI API access currently `black` Create test_ai.py fix to_files execute_workspace -> gen_entrypoint; execute_entrypoint Ignore my-new-project/ Added CODE_OF_CONDUCT.md to the .github directory (#147) make pre commit pass in the whole codebase (#149) Create ci.yaml Fix linting Add support for directory paths in filenames and improve code splitting - Enforce an explicit markdown code block format - Add a token to split the output to clearly detect when the code blocks start - Save all non-code output to a `README.md` file - Update RegEx to extract and strip text more reliably and clean up the output - Update the identify prompts appropriately Enhance philosophy to include supporting documents - Create instructions for running/compiling the project - Create any package manager files Generate instructions for all platforms - Update prompt to create instructions for all 3 major OS platforms - Fix small typo Add support for directory creation and binary files - Use the `Path` module instead of `os` - Add ability to create any amount of missing directories for a given file - Add ability to save both text and binary files to save images (or other file types) later Add cleanup & move `projects` to their own directory - Add optional argument to clean and delete the working directories of the project before running the prompt - Add `.gitignore` entry to ignore all possible projects - Update readme
This commit is contained in:
@@ -8,10 +8,12 @@ class AI:
|
||||
try:
|
||||
openai.Model.retrieve("gpt-4")
|
||||
except openai.error.InvalidRequestError:
|
||||
print("Model gpt-4 not available for provided api key reverting "
|
||||
"to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
|
||||
"https://openai.com/waitlist/gpt-4-api")
|
||||
self.kwargs['model'] = "gpt-3.5-turbo"
|
||||
print(
|
||||
"Model gpt-4 not available for provided api key reverting "
|
||||
"to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
|
||||
"https://openai.com/waitlist/gpt-4-api"
|
||||
)
|
||||
self.kwargs["model"] = "gpt-3.5-turbo"
|
||||
|
||||
def start(self, system, user):
|
||||
messages = [
|
||||
@@ -26,10 +28,10 @@ class AI:
|
||||
|
||||
def fuser(self, msg):
|
||||
return {"role": "user", "content": msg}
|
||||
|
||||
def fassistant(self, msg):
|
||||
return {"role": "assistant", "content": msg}
|
||||
|
||||
|
||||
def next(self, messages: list[dict[str, str]], prompt=None):
|
||||
if prompt:
|
||||
messages = messages + [{"role": "user", "content": prompt}]
|
||||
|
||||
@@ -1,27 +1,42 @@
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
from gpt_engineer.db import DB
|
||||
|
||||
|
||||
def parse_chat(chat) -> List[Tuple[str, str]]:
|
||||
# Get all ``` blocks
|
||||
regex = r"```(.*?)```"
|
||||
def parse_chat(chat): # -> List[Tuple[str, str]]:
|
||||
# Split the chat into sections by the "*CODEBLOCKSBELOW*" token
|
||||
split_chat = chat.split("*CODEBLOCKSBELOW*")
|
||||
|
||||
matches = re.finditer(regex, chat, re.DOTALL)
|
||||
# Check if the "*CODEBLOCKSBELOW*" token was found
|
||||
is_token_found = len(split_chat) > 1
|
||||
|
||||
# If the "*CODEBLOCKSBELOW*" token is found, use the first part as README
|
||||
# and second part as code blocks. Otherwise, treat README as optional and
|
||||
# proceed with empty README and the entire chat as code blocks
|
||||
readme = split_chat[0].strip() if is_token_found else "No readme"
|
||||
code_blocks = split_chat[1] if is_token_found else chat
|
||||
|
||||
# Get all ``` blocks and preceding filenames
|
||||
regex = r"(\S+?)\n```\S+\n(.+?)```"
|
||||
matches = re.finditer(regex, code_blocks, re.DOTALL)
|
||||
|
||||
files = []
|
||||
for match in matches:
|
||||
path = match.group(1).split("\n")[0]
|
||||
# Strip the filename of any non-allowed characters and convert / to \
|
||||
path = re.sub(r'[<>"|?*]', "", match.group(1))
|
||||
|
||||
# Get the code
|
||||
code = match.group(1).split("\n")[1:]
|
||||
code = "\n".join(code)
|
||||
code = match.group(2)
|
||||
|
||||
# Add the file to the list
|
||||
files.append((path, code))
|
||||
|
||||
# Add README to the list
|
||||
files.append(("README.txt", readme))
|
||||
|
||||
# Return the files
|
||||
return files
|
||||
|
||||
|
||||
def to_files(chat: str, workspace: DB):
|
||||
def to_files(chat, workspace):
|
||||
workspace["all_output.txt"] = chat
|
||||
|
||||
files = parse_chat(chat)
|
||||
|
||||
@@ -1,33 +1,51 @@
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# This class represents a simple database that stores its data as files in a directory.
|
||||
# It supports both text and binary files, and can handle directory structures.
|
||||
class DB:
|
||||
"""A simple key-value store, where keys are filenames and values are file contents."""
|
||||
|
||||
def __init__(self, path):
|
||||
# Convert the path string to a Path object and get its absolute path.
|
||||
self.path = Path(path).absolute()
|
||||
os.makedirs(self.path, exist_ok=True)
|
||||
|
||||
# Create the directory if it doesn't exist.
|
||||
self.path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def __getitem__(self, key):
|
||||
with open(self.path / key, encoding='utf-8') as f:
|
||||
return f.read()
|
||||
# Combine the database directory with the provided file path.
|
||||
full_path = self.path / key
|
||||
|
||||
# Check if the file exists before trying to open it.
|
||||
if full_path.is_file():
|
||||
# Open the file in text mode and return its content.
|
||||
with full_path.open("r") as f:
|
||||
return f.read()
|
||||
else:
|
||||
# If the file doesn't exist, raise an error.
|
||||
raise FileNotFoundError(f"No such file: '{full_path}'")
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
Path(self.path / key).absolute().parent.mkdir(parents=True, exist_ok=True)
|
||||
# Combine the database directory with the provided file path.
|
||||
full_path = self.path / key
|
||||
|
||||
with open(self.path / key, 'w', encoding='utf-8') as f:
|
||||
f.write(val)
|
||||
# Create the directory tree if it doesn't exist.
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def __contains__(self, key):
|
||||
return (self.path / key).exists()
|
||||
# Write the data to the file. If val is a string, it's written as text.
|
||||
# If val is bytes, it's written as binary data.
|
||||
if isinstance(val, str):
|
||||
full_path.write_text(val)
|
||||
elif isinstance(val, bytes):
|
||||
full_path.write_bytes(val)
|
||||
else:
|
||||
# If val is neither a string nor bytes, raise an error.
|
||||
raise TypeError("val must be either a str or bytes")
|
||||
|
||||
|
||||
# dataclass for all dbs:
|
||||
@dataclass
|
||||
class DBs:
|
||||
"""A dataclass for all dbs"""
|
||||
|
||||
memory: DB
|
||||
logs: DB
|
||||
identity: DB
|
||||
|
||||
@@ -1,32 +1,41 @@
|
||||
import os
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
import typer
|
||||
|
||||
from gpt_engineer.chat_to_files import to_files
|
||||
from gpt_engineer.ai import AI
|
||||
from gpt_engineer.steps import STEPS
|
||||
from gpt_engineer.db import DB, DBs
|
||||
|
||||
from gpt_engineer.steps import STEPS
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
@app.command()
|
||||
def chat(
|
||||
project_path: str = typer.Argument(str(pathlib.Path(os.path.curdir) / "example"), help="path"),
|
||||
project_path: str = typer.Argument("example", help="path"),
|
||||
delete_existing: str = typer.Argument(None, help="delete existing files"),
|
||||
run_prefix: str = typer.Option(
|
||||
"",
|
||||
help="run prefix, if you want to run multiple variants of the same project and later compare them",
|
||||
help=(
|
||||
"run prefix, if you want to run multiple variants of the same project and "
|
||||
"later compare them",
|
||||
),
|
||||
),
|
||||
model: str = "gpt-4",
|
||||
temperature: float = 0.1,
|
||||
steps_config: str = "default",
|
||||
):
|
||||
app_dir = pathlib.Path(os.path.curdir)
|
||||
input_path = project_path
|
||||
memory_path = pathlib.Path(project_path) / (run_prefix + "memory")
|
||||
workspace_path = pathlib.Path(project_path) / (run_prefix + "workspace")
|
||||
input_path = pathlib.Path(app_dir / "projects" / project_path)
|
||||
memory_path = input_path / (run_prefix + "memory")
|
||||
workspace_path = input_path / (run_prefix + "workspace")
|
||||
|
||||
if delete_existing == "true":
|
||||
# Delete files and subdirectories in paths
|
||||
shutil.rmtree(memory_path, ignore_errors=True)
|
||||
shutil.rmtree(workspace_path, ignore_errors=True)
|
||||
|
||||
ai = AI(
|
||||
model=model,
|
||||
@@ -45,5 +54,6 @@ def chat(
|
||||
messages = step(ai, dbs)
|
||||
dbs.logs[step.__name__] = json.dumps(messages)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
@@ -2,9 +2,8 @@ import json
|
||||
import subprocess
|
||||
|
||||
from gpt_engineer.ai import AI
|
||||
from gpt_engineer.chat_to_files import to_files
|
||||
from gpt_engineer.chat_to_files import parse_chat, to_files
|
||||
from gpt_engineer.db import DBs
|
||||
from gpt_engineer.chat_to_files import parse_chat
|
||||
|
||||
|
||||
def setup_sys_prompt(dbs):
|
||||
@@ -54,7 +53,8 @@ def clarify(ai: AI, dbs: DBs):
|
||||
|
||||
def gen_spec(ai: AI, dbs: DBs):
|
||||
"""
|
||||
Generate a spec from the main prompt + clarifications and save the results to the workspace
|
||||
Generate a spec from the main prompt + clarifications and save the results to
|
||||
the workspace
|
||||
"""
|
||||
messages = [
|
||||
ai.fsystem(setup_sys_prompt(dbs)),
|
||||
@@ -67,6 +67,7 @@ def gen_spec(ai: AI, dbs: DBs):
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def respec(ai: AI, dbs: DBs):
|
||||
messages = dbs.logs[gen_spec.__name__]
|
||||
messages += [ai.fsystem(dbs.identity["respec"])]
|
||||
@@ -75,10 +76,13 @@ def respec(ai: AI, dbs: DBs):
|
||||
messages = ai.next(
|
||||
messages,
|
||||
(
|
||||
'Based on the conversation so far, please reiterate the specification for the program. '
|
||||
'If there are things that can be improved, please incorporate the improvements. '
|
||||
"If you are satisfied with the specification, just write out the specification word by word again."
|
||||
)
|
||||
"Based on the conversation so far, please reiterate the specification for "
|
||||
"the program. "
|
||||
"If there are things that can be improved, please incorporate the "
|
||||
"improvements. "
|
||||
"If you are satisfied with the specification, just write out the "
|
||||
"specification word by word again."
|
||||
),
|
||||
)
|
||||
|
||||
dbs.memory["specification"] = messages[-1]["content"]
|
||||
@@ -116,6 +120,7 @@ def gen_clarified_code(ai: AI, dbs: DBs):
|
||||
to_files(messages[-1]["content"], dbs.workspace)
|
||||
return messages
|
||||
|
||||
|
||||
def gen_code(ai: AI, dbs: DBs):
|
||||
# get the messages from previous step
|
||||
|
||||
@@ -130,12 +135,6 @@ def gen_code(ai: AI, dbs: DBs):
|
||||
return messages
|
||||
|
||||
|
||||
def execute_workspace(ai: AI, dbs: DBs):
|
||||
messages = gen_entrypoint(ai, dbs)
|
||||
execute_entrypoint(ai, dbs)
|
||||
return messages
|
||||
|
||||
|
||||
def execute_entrypoint(ai, dbs):
|
||||
command = dbs.workspace["run.sh"]
|
||||
|
||||
@@ -157,8 +156,10 @@ def execute_entrypoint(ai, dbs):
|
||||
def gen_entrypoint(ai, dbs):
|
||||
messages = ai.start(
|
||||
system=(
|
||||
f"You will get information about a codebase that is currently on disk in the current folder.\n"
|
||||
"From this you will answer with one code block that includes all the necessary macos terminal commands to "
|
||||
"You will get information about a codebase that is currently on disk in "
|
||||
f"the folder {dbs.workspace.path}.\n"
|
||||
"From this you will answer with code blocks that includes all the necessary "
|
||||
"Windows, MacOS, and Linux terminal commands to "
|
||||
"a) install dependencies "
|
||||
"b) run all necessary parts of the codebase (in parallell if necessary).\n"
|
||||
"Do not install globally. Do not use sudo.\n"
|
||||
@@ -170,11 +171,16 @@ def gen_entrypoint(ai, dbs):
|
||||
|
||||
blocks = parse_chat(messages[-1]["content"])
|
||||
for lang, _ in blocks:
|
||||
assert lang in ["", "bash", "sh"], "Generated entrypoint command that was not bash"
|
||||
assert lang in [
|
||||
"",
|
||||
"bash",
|
||||
"sh",
|
||||
], "Generated entrypoint command that was not bash"
|
||||
|
||||
dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
|
||||
return messages
|
||||
|
||||
|
||||
def use_feedback(ai: AI, dbs: DBs):
|
||||
messages = [
|
||||
ai.fsystem(setup_sys_prompt(dbs)),
|
||||
@@ -182,18 +188,25 @@ def use_feedback(ai: AI, dbs: DBs):
|
||||
ai.fassistant(dbs.workspace["all_output.txt"]),
|
||||
ai.fsystem(dbs.identity["use_feedback"]),
|
||||
]
|
||||
messages = ai.next(messages, dbs.memory['feedback'])
|
||||
messages = ai.next(messages, dbs.memory["feedback"])
|
||||
to_files(messages[-1]["content"], dbs.workspace)
|
||||
return messages
|
||||
|
||||
|
||||
# Different configs of what steps to run
|
||||
STEPS = {
|
||||
"default": [gen_spec, gen_unit_tests, gen_code, execute_workspace],
|
||||
"default": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint, execute_entrypoint],
|
||||
"benchmark": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint],
|
||||
"simple": [simple_gen, execute_workspace],
|
||||
"clarify": [clarify, gen_clarified_code, execute_workspace],
|
||||
"respec": [gen_spec, respec, gen_unit_tests, gen_code, execute_workspace],
|
||||
"simple": [simple_gen, gen_entrypoint, execute_entrypoint],
|
||||
"clarify": [clarify, gen_clarified_code, gen_entrypoint, execute_entrypoint],
|
||||
"respec": [
|
||||
gen_spec,
|
||||
respec,
|
||||
gen_unit_tests,
|
||||
gen_code,
|
||||
gen_entrypoint,
|
||||
execute_entrypoint,
|
||||
],
|
||||
"execute_only": [execute_entrypoint],
|
||||
"use_feedback": [use_feedback],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user