Merge branch 'main' of github.com:AntonOsika/gpt-engineer

* 'main' of github.com:AntonOsika/gpt-engineer:
  Mark test as failed because it requires OpenAI API access currently
  `black`
  Create test_ai.py
  fix to_files
  execute_workspace -> gen_entrypoint; execute_entrypoint
  Ignore my-new-project/
  Added CODE_OF_CONDUCT.md to the .github directory (#147)
  make pre commit pass in the whole codebase (#149)
  Create ci.yaml
  Fix linting
  Add support for directory paths in filenames and improve code splitting - Enforce an explicit markdown code block format - Add a token to split the output to clearly detect when the code blocks start - Save all non-code output to a `README.md` file - Update RegEx to extract and strip text more reliably and clean up the output - Update the identify prompts appropriately
  Enhance philosophy to include supporting documents - Create instructions for running/compiling the project - Create any package manager files
  Generate instructions for all platforms - Update prompt to create instructions for all 3 major OS platforms - Fix small typo
  Add support for directory creation and binary files - Use the `Path` module instead of `os` - Add ability to create any amount of missing directories for a given file - Add ability to save both text and binary files to save images (or other file types) later
  Add cleanup & move `projects` to their own directory - Add optional argument to clean and delete the working directories of the project before running the prompt - Add `.gitignore` entry to ignore all possible projects - Update readme
This commit is contained in:
Anton Osika
2023-06-18 15:13:34 +02:00
17 changed files with 355 additions and 117 deletions

View File

@@ -8,10 +8,12 @@ class AI:
try:
openai.Model.retrieve("gpt-4")
except openai.error.InvalidRequestError:
print("Model gpt-4 not available for provided api key reverting "
"to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
"https://openai.com/waitlist/gpt-4-api")
self.kwargs['model'] = "gpt-3.5-turbo"
print(
"Model gpt-4 not available for provided api key reverting "
"to gpt-3.5.turbo. Sign up for the gpt-4 wait list here: "
"https://openai.com/waitlist/gpt-4-api"
)
self.kwargs["model"] = "gpt-3.5-turbo"
def start(self, system, user):
messages = [
@@ -26,10 +28,10 @@ class AI:
def fuser(self, msg):
return {"role": "user", "content": msg}
def fassistant(self, msg):
return {"role": "assistant", "content": msg}
def next(self, messages: list[dict[str, str]], prompt=None):
if prompt:
messages = messages + [{"role": "user", "content": prompt}]

View File

@@ -1,27 +1,42 @@
import re
from typing import List, Tuple
from gpt_engineer.db import DB
def parse_chat(chat) -> List[Tuple[str, str]]:
# Get all ``` blocks
regex = r"```(.*?)```"
def parse_chat(chat): # -> List[Tuple[str, str]]:
# Split the chat into sections by the "*CODEBLOCKSBELOW*" token
split_chat = chat.split("*CODEBLOCKSBELOW*")
matches = re.finditer(regex, chat, re.DOTALL)
# Check if the "*CODEBLOCKSBELOW*" token was found
is_token_found = len(split_chat) > 1
# If the "*CODEBLOCKSBELOW*" token is found, use the first part as README
# and second part as code blocks. Otherwise, treat README as optional and
# proceed with empty README and the entire chat as code blocks
readme = split_chat[0].strip() if is_token_found else "No readme"
code_blocks = split_chat[1] if is_token_found else chat
# Get all ``` blocks and preceding filenames
regex = r"(\S+?)\n```\S+\n(.+?)```"
matches = re.finditer(regex, code_blocks, re.DOTALL)
files = []
for match in matches:
path = match.group(1).split("\n")[0]
# Strip the filename of any non-allowed characters and convert / to \
path = re.sub(r'[<>"|?*]', "", match.group(1))
# Get the code
code = match.group(1).split("\n")[1:]
code = "\n".join(code)
code = match.group(2)
# Add the file to the list
files.append((path, code))
# Add README to the list
files.append(("README.txt", readme))
# Return the files
return files
def to_files(chat: str, workspace: DB):
def to_files(chat, workspace):
workspace["all_output.txt"] = chat
files = parse_chat(chat)

View File

@@ -1,33 +1,51 @@
from dataclasses import dataclass
import os
from pathlib import Path
# This class represents a simple database that stores its data as files in a directory.
# It supports both text and binary files, and can handle directory structures.
class DB:
"""A simple key-value store, where keys are filenames and values are file contents."""
def __init__(self, path):
# Convert the path string to a Path object and get its absolute path.
self.path = Path(path).absolute()
os.makedirs(self.path, exist_ok=True)
# Create the directory if it doesn't exist.
self.path.mkdir(parents=True, exist_ok=True)
def __getitem__(self, key):
with open(self.path / key, encoding='utf-8') as f:
return f.read()
# Combine the database directory with the provided file path.
full_path = self.path / key
# Check if the file exists before trying to open it.
if full_path.is_file():
# Open the file in text mode and return its content.
with full_path.open("r") as f:
return f.read()
else:
# If the file doesn't exist, raise an error.
raise FileNotFoundError(f"No such file: '{full_path}'")
def __setitem__(self, key, val):
Path(self.path / key).absolute().parent.mkdir(parents=True, exist_ok=True)
# Combine the database directory with the provided file path.
full_path = self.path / key
with open(self.path / key, 'w', encoding='utf-8') as f:
f.write(val)
# Create the directory tree if it doesn't exist.
full_path.parent.mkdir(parents=True, exist_ok=True)
def __contains__(self, key):
return (self.path / key).exists()
# Write the data to the file. If val is a string, it's written as text.
# If val is bytes, it's written as binary data.
if isinstance(val, str):
full_path.write_text(val)
elif isinstance(val, bytes):
full_path.write_bytes(val)
else:
# If val is neither a string nor bytes, raise an error.
raise TypeError("val must be either a str or bytes")
# dataclass for all dbs:
@dataclass
class DBs:
"""A dataclass for all dbs"""
memory: DB
logs: DB
identity: DB

View File

@@ -1,32 +1,41 @@
import os
import json
import os
import pathlib
import shutil
import typer
from gpt_engineer.chat_to_files import to_files
from gpt_engineer.ai import AI
from gpt_engineer.steps import STEPS
from gpt_engineer.db import DB, DBs
from gpt_engineer.steps import STEPS
app = typer.Typer()
@app.command()
def chat(
project_path: str = typer.Argument(str(pathlib.Path(os.path.curdir) / "example"), help="path"),
project_path: str = typer.Argument("example", help="path"),
delete_existing: str = typer.Argument(None, help="delete existing files"),
run_prefix: str = typer.Option(
"",
help="run prefix, if you want to run multiple variants of the same project and later compare them",
help=(
"run prefix, if you want to run multiple variants of the same project and "
"later compare them",
),
),
model: str = "gpt-4",
temperature: float = 0.1,
steps_config: str = "default",
):
app_dir = pathlib.Path(os.path.curdir)
input_path = project_path
memory_path = pathlib.Path(project_path) / (run_prefix + "memory")
workspace_path = pathlib.Path(project_path) / (run_prefix + "workspace")
input_path = pathlib.Path(app_dir / "projects" / project_path)
memory_path = input_path / (run_prefix + "memory")
workspace_path = input_path / (run_prefix + "workspace")
if delete_existing == "true":
# Delete files and subdirectories in paths
shutil.rmtree(memory_path, ignore_errors=True)
shutil.rmtree(workspace_path, ignore_errors=True)
ai = AI(
model=model,
@@ -45,5 +54,6 @@ def chat(
messages = step(ai, dbs)
dbs.logs[step.__name__] = json.dumps(messages)
if __name__ == "__main__":
app()

View File

@@ -2,9 +2,8 @@ import json
import subprocess
from gpt_engineer.ai import AI
from gpt_engineer.chat_to_files import to_files
from gpt_engineer.chat_to_files import parse_chat, to_files
from gpt_engineer.db import DBs
from gpt_engineer.chat_to_files import parse_chat
def setup_sys_prompt(dbs):
@@ -54,7 +53,8 @@ def clarify(ai: AI, dbs: DBs):
def gen_spec(ai: AI, dbs: DBs):
"""
Generate a spec from the main prompt + clarifications and save the results to the workspace
Generate a spec from the main prompt + clarifications and save the results to
the workspace
"""
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
@@ -67,6 +67,7 @@ def gen_spec(ai: AI, dbs: DBs):
return messages
def respec(ai: AI, dbs: DBs):
messages = dbs.logs[gen_spec.__name__]
messages += [ai.fsystem(dbs.identity["respec"])]
@@ -75,10 +76,13 @@ def respec(ai: AI, dbs: DBs):
messages = ai.next(
messages,
(
'Based on the conversation so far, please reiterate the specification for the program. '
'If there are things that can be improved, please incorporate the improvements. '
"If you are satisfied with the specification, just write out the specification word by word again."
)
"Based on the conversation so far, please reiterate the specification for "
"the program. "
"If there are things that can be improved, please incorporate the "
"improvements. "
"If you are satisfied with the specification, just write out the "
"specification word by word again."
),
)
dbs.memory["specification"] = messages[-1]["content"]
@@ -116,6 +120,7 @@ def gen_clarified_code(ai: AI, dbs: DBs):
to_files(messages[-1]["content"], dbs.workspace)
return messages
def gen_code(ai: AI, dbs: DBs):
# get the messages from previous step
@@ -130,12 +135,6 @@ def gen_code(ai: AI, dbs: DBs):
return messages
def execute_workspace(ai: AI, dbs: DBs):
messages = gen_entrypoint(ai, dbs)
execute_entrypoint(ai, dbs)
return messages
def execute_entrypoint(ai, dbs):
command = dbs.workspace["run.sh"]
@@ -157,8 +156,10 @@ def execute_entrypoint(ai, dbs):
def gen_entrypoint(ai, dbs):
messages = ai.start(
system=(
f"You will get information about a codebase that is currently on disk in the current folder.\n"
"From this you will answer with one code block that includes all the necessary macos terminal commands to "
"You will get information about a codebase that is currently on disk in "
f"the folder {dbs.workspace.path}.\n"
"From this you will answer with code blocks that includes all the necessary "
"Windows, MacOS, and Linux terminal commands to "
"a) install dependencies "
"b) run all necessary parts of the codebase (in parallell if necessary).\n"
"Do not install globally. Do not use sudo.\n"
@@ -170,11 +171,16 @@ def gen_entrypoint(ai, dbs):
blocks = parse_chat(messages[-1]["content"])
for lang, _ in blocks:
assert lang in ["", "bash", "sh"], "Generated entrypoint command that was not bash"
assert lang in [
"",
"bash",
"sh",
], "Generated entrypoint command that was not bash"
dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
return messages
def use_feedback(ai: AI, dbs: DBs):
messages = [
ai.fsystem(setup_sys_prompt(dbs)),
@@ -182,18 +188,25 @@ def use_feedback(ai: AI, dbs: DBs):
ai.fassistant(dbs.workspace["all_output.txt"]),
ai.fsystem(dbs.identity["use_feedback"]),
]
messages = ai.next(messages, dbs.memory['feedback'])
messages = ai.next(messages, dbs.memory["feedback"])
to_files(messages[-1]["content"], dbs.workspace)
return messages
# Different configs of what steps to run
STEPS = {
"default": [gen_spec, gen_unit_tests, gen_code, execute_workspace],
"default": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint, execute_entrypoint],
"benchmark": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint],
"simple": [simple_gen, execute_workspace],
"clarify": [clarify, gen_clarified_code, execute_workspace],
"respec": [gen_spec, respec, gen_unit_tests, gen_code, execute_workspace],
"simple": [simple_gen, gen_entrypoint, execute_entrypoint],
"clarify": [clarify, gen_clarified_code, gen_entrypoint, execute_entrypoint],
"respec": [
gen_spec,
respec,
gen_unit_tests,
gen_code,
gen_entrypoint,
execute_entrypoint,
],
"execute_only": [execute_entrypoint],
"use_feedback": [use_feedback],
}