Merge branch 'main' into directory-creation

# Conflicts: # .gitignore # gpt_engineer/steps.py # identity/generate # identity/use_qa
2025-12-17 12:45:26 +01:00 · 2023-06-18 10:18:27 +02:00
parent e7df947b98 4b093cb29b
commit 2f2fef50fe
12 changed files with 182 additions and 52 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ build/

 # Virtual environments
 .env
+.env.sh
 venv/
 ENV/

@@ -36,6 +37,7 @@ archive

 # any log file
 *log.txt
+todo

 # ignore all project files
 projects
--- a/53
+++ b/53
@@ -0,0 +1,53 @@
+SHELL := /bin/bash
+
+# Color codes
+COLOR_RESET=\033[0m
+COLOR_CYAN=\033[1;36m
+COLOR_GREEN=\033[1;32m
+
+.PHONY: help install dev-install run
+
+.DEFAULT_GOAL := help
+
+.SILENT:
+
+name := $(word 2,$(MAKECMDGOALS))
+
+help:
+	@echo "Please use 'make <target>' where <target> is one of the following:"
+	@echo "  help           	Return this message with usage instructions."
+	@echo "  install        	Will install the dependencies and create a virtual environment."
+	@echo "  dev-install    	Will install the dev dependencies too."
+	@echo "  run <folder_name>  Runs GPT Engineer on the folder with the given name."
+
+dev-install: create-venv upgrade-pip install-dependencies install-pre-commit farewell
+
+install: create-venv upgrade-pip install-dependencies farewell
+
+create-venv:
+	@echo -e "$(COLOR_CYAN)Creating virtual environment...$(COLOR_RESET)" && \
+	python -m venv venv
+
+upgrade-pip:
+	@echo -e "$(COLOR_CYAN)Upgrading pip...$(COLOR_RESET)" && \
+	source venv/bin/activate && \
+	pip install --upgrade pip >> /dev/null
+
+install-dependencies:
+	@echo -e "$(COLOR_CYAN)Installing dependencies...$(COLOR_RESET)" && \
+	source venv/bin/activate && \
+	pip install -r requirements.txt >> /dev/null
+
+install-pre-commit:
+	@echo -e "$(COLOR_CYAN)Installing pre-commit hooks...$(COLOR_RESET)" && \
+	source venv/bin/activate && \
+	pre-commit install
+
+farewell:
+	@echo -e "$(COLOR_GREEN)All done!$(COLOR_RESET)"
+
+run:
+	@echo -e "$(COLOR_CYAN)Running GPT Engineer on $(COLOR_GREEN)$(name)$(COLOR_CYAN) folder...$(COLOR_RESET)" && \
+	source venv/bin/activate && \
+	python -m gpt_engineer.main $(name)
+
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@

 GPT Engineer is made to be easy to adapt, extend, and make your agent learn how you want your code to look. It generates an entire codebase based on a prompt.

+[Demo](https://twitter.com/antonosika/status/1667641038104674306) 👶🤖
+
 ## Project philosophy
 - Simple to get value
 - Flexible and easy to add new own "AI steps". See `steps.py`.
@@ -13,10 +15,6 @@ GPT Engineer is made to be easy to adapt, extend, and make your agent learn how
 - Simplicity, all computation is "resumable" and persisted to the filesystem


-
-![output](https://github.com/AntonOsika/gpt-engineer/assets/4467025/a6938d43-2ac1-4cf1-98d1-93eea1bdfce4)
-
-
 ## Usage

 **Setup**:
--- a/gpt_engineer/ai.py
+++ b/gpt_engineer/ai.py
@@ -26,6 +26,9 @@ class AI:

    def fuser(self, msg):
        return {"role": "user", "content": msg}
+    def fassistant(self, msg):
+        return {"role": "assistant", "content": msg}
+

    def next(self, messages: list[dict[str, str]], prompt=None):
        if prompt:
--- a/gpt_engineer/steps.py
+++ b/gpt_engineer/steps.py
@@ -8,10 +8,10 @@ from gpt_engineer.chat_to_files import parse_chat


 def setup_sys_prompt(dbs):
-    return dbs.identity["setup"] + "\nUseful to know:\n" + dbs.identity["philosophy"]
+    return dbs.identity["generate"] + "\nUseful to know:\n" + dbs.identity["philosophy"]


-def run(ai: AI, dbs: DBs):
+def simple_gen(ai: AI, dbs: DBs):
    """Run the AI on the main prompt and save the results"""
    messages = ai.start(
        setup_sys_prompt(dbs),
@@ -30,7 +30,7 @@ def clarify(ai: AI, dbs: DBs):
    while True:
        messages = ai.next(messages, user)

-        if messages[-1]['content'].strip().lower().startswith("no"):
+        if messages[-1]["content"].strip().lower().startswith("no"):
            break

        print()
@@ -53,34 +53,70 @@ def clarify(ai: AI, dbs: DBs):


 def gen_spec(ai: AI, dbs: DBs):
-    '''
+    """
    Generate a spec from the main prompt + clarifications and save the results to the workspace
-    '''
-    messages = [ai.fsystem(setup_sys_prompt(dbs)), ai.fsystem(f"Main prompt: {dbs.input['main_prompt']}")]
+    """
+    messages = [
+        ai.fsystem(setup_sys_prompt(dbs)),
+        ai.fsystem(f"Instructions: {dbs.input['main_prompt']}"),
+    ]

-    messages = ai.next(messages, dbs.identity['spec'])
-    messages = ai.next(messages, dbs.identity['respec'])
-    messages = ai.next(messages, dbs.identity['spec'])
+    messages = ai.next(messages, dbs.identity["spec"])

-    dbs.memory['specification'] = messages[-1]['content']
+    dbs.memory["specification"] = messages[-1]["content"]

    return messages

-def pre_unit_tests(ai: AI, dbs: DBs):
-    '''
+def respec(ai: AI, dbs: DBs):
+    messages = dbs.logs[gen_spec.__name__]
+    messages += [ai.fsystem(dbs.identity["respec"])]
+
+    messages = ai.next(messages)
+    messages = ai.next(
+        messages,
+        (
+            'Based on the conversation so far, please reiterate the specification for the program. '
+            'If there are things that can be improved, please incorporate the improvements. '
+            "If you are satisfied with the specification, just write out the specification word by word again."
+        )
+    )
+
+    dbs.memory["specification"] = messages[-1]["content"]
+    return messages
+
+
+def gen_unit_tests(ai: AI, dbs: DBs):
+    """
    Generate unit tests based on the specification, that should work.
-    '''
-    messages = [ai.fsystem(setup_sys_prompt(dbs)), ai.fuser(f"Instructions: {dbs.input['main_prompt']}"), ai.fuser(f"Specification:\n\n{dbs.memory['specification']}")]
+    """
+    messages = [
+        ai.fsystem(setup_sys_prompt(dbs)),
+        ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
+        ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
+    ]

-    messages = ai.next(messages, dbs.identity['unit_tests'])
+    messages = ai.next(messages, dbs.identity["unit_tests"])

-    dbs.memory['unit_tests'] = messages[-1]['content']
-    to_files(dbs.memory['unit_tests'], dbs.workspace)
+    dbs.memory["unit_tests"] = messages[-1]["content"]
+    to_files(dbs.memory["unit_tests"], dbs.workspace)

    return messages


-def run_clarified(ai: AI, dbs: DBs):
+def gen_clarified_code(ai: AI, dbs: DBs):
+    # get the messages from previous step
+
+    messages = json.loads(dbs.logs[clarify.__name__])
+
+    messages = [
+        ai.fsystem(setup_sys_prompt(dbs)),
+    ] + messages[1:]
+    messages = ai.next(messages, dbs.identity["use_qa"])
+
+    to_files(messages[-1]["content"], dbs.workspace)
+    return messages
+
+def gen_code(ai: AI, dbs: DBs):
    # get the messages from previous step

    messages = [
@@ -95,39 +131,70 @@ def run_clarified(ai: AI, dbs: DBs):


 def execute_workspace(ai: AI, dbs: DBs):
+    messages = gen_entrypoint(ai, dbs)
+    execute_entrypoint(ai, dbs)
+    return messages
+
+
+def execute_entrypoint(ai, dbs):
+    command = dbs.workspace["run.sh"]
+
+    print("Do you want to execute this code?")
+    print()
+    print(command)
+    print()
+    print('If yes, press enter. If no, type "no"')
+    print()
+    if input() == "no":
+        print("Ok, not executing the code.")
+    print("Executing the code...")
+    print()
+    subprocess.run("bash run.sh", shell=True, cwd=dbs.workspace.path)
+    return []
+
+
+def gen_entrypoint(ai, dbs):
    messages = ai.start(
        system=(
            f"You will get information about a codebase that is currently on disk in the folder {dbs.workspace.path}.\n"
            "From this you will answer with code blocks that includes all the necessary Windows, MacOS, and Linux terminal commands to "
            "a) install dependencies "
-            "b) run the necessary parts of the codebase to try it.\n"
+            "b) run all necessary parts of the codebase (in parallell if necessary).\n"
+            "Do not install globally. Do not use sudo.\n"
            "Do not explain the code, just give the commands.\n"
        ),
        user="Information about the codebase:\n\n" + dbs.workspace["all_output.txt"],
    )
+    print()

-    [[lang, command]] = parse_chat(messages[-1]['content'])
-    assert lang in ['', 'bash', 'sh']
+    blocks = parse_chat(messages[-1]["content"])
+    for lang, _ in blocks:
+        assert lang in ["", "bash", "sh"], "Generated entrypoint command that was not bash"

-    print('Do you want to execute this code?')
-    print(command)
-    print()
-    print('If yes, press enter. If no, type "no"')
-    print()
-    if input() == 'no':
-        print('Ok, not executing the code.')
-        return messages
-    print('Executing the code...')
-    print()
-    subprocess.run(command, shell=True)
+    dbs.workspace["run.sh"] = "\n".join(block for lang, block in blocks)
+    return messages
+
+def use_feedback(ai: AI, dbs: DBs):
+    messages = [
+        ai.fsystem(setup_sys_prompt(dbs)),
+        ai.fuser(f"Instructions: {dbs.input['main_prompt']}"),
+        ai.fassistant(dbs.workspace["all_output.txt"]),
+        ai.fsystem(dbs.identity["use_feedback"]),
+    ]
+    messages = ai.next(messages, dbs.memory['feedback'])
+    to_files(messages[-1]["content"], dbs.workspace)
    return messages


 # Different configs of what steps to run
 STEPS = {
-    'default': [gen_spec, pre_unit_tests, run_clarified, execute_workspace],
-    'simple': [run, execute_workspace],
-    'clarify': [clarify, run_clarified],
+    "default": [gen_spec, gen_unit_tests, gen_code, execute_workspace],
+    "benchmark": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint],
+    "simple": [simple_gen, execute_workspace],
+    "clarify": [clarify, gen_clarified_code, execute_workspace],
+    "respec": [gen_spec, respec, gen_unit_tests, gen_code, execute_workspace],
+    "execute_only": [execute_entrypoint],
+    "use_feedback": [use_feedback],
 }

 # Future steps that can be added:
--- a/identity/generate
+++ b/identity/generate
--- a/identity/respec
+++ b/identity/respec
@@ -1,8 +1,12 @@
-You are a pragmatic principal engineer at Google. You have been asked to review a specification for a new feature.
+You are a pragmatic principal engineer at Google.
+You have been asked to review a specification for a new feature by a previous version of yourself

 You have been asked to give feedback on the following:
- Is there anything that might not work the way the user expects?
- Is there anything missing for the program to fully work?
- Is there anything that can be simplified without decreasing quality?
+- Is there anything that might not work the way intended by the instructions?
+- Is there anything in the specification missing for the program to work as expected?
+- Is there anything that can be simplified without significant drawback?

-You are asked to make educated assumptions for each unclear item. For each of these, communicate which assumptions you'll make when implementing the feature.
+You are asked to make educated assumptions for each unclear item.
+For each of these, communicate which assumptions you'll make when implementing the feature.
+
+Think step by step to make sure we don't miss anything.
--- a/identity/spec
+++ b/identity/spec
@@ -1,8 +1,11 @@
-You are a super smart developer and an AI developed to write programs. You have been asked to make a specification for a program.
+You are a super smart developer. You have been asked to make a specification for a program.

-Please generate a specification based on the given input. First, be super explicit about what the program should do, which features it should have and give details about anything that might be unclear. **Don't leave anything unclear or undefined.**
+Think step by step to make sure we get a high quality specification and we don't miss anything.
+First, be super explicit about what the program should do, which features it should have
+and give details about anything that might be unclear. **Don't leave anything unclear or undefined.**

-Second, lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
-Then write out which non-standard dependencies you'll have to use.
+Second, lay out the names of the core classes, functions, methods that will be necessary,
+as well as a quick comment on their purpose.
+Finally list all non-standard dependencies you'll have to use.

-This specification will be used later as the basis for your implementation.
+This specification will be used later as the basis for the implementation.
--- a/identity/unit_tests
+++ b/identity/unit_tests
@@ -1,3 +1,3 @@
-You are a super smart developer and an AI developed to use Test Driven Development to write tests according to a specification.
+You are a super smart developer using Test Driven Development to write tests according to a specification.

 Please generate tests based on the above specification. The tests should be as simple as possible, but still cover all the functionality.
--- a/identity/use_feedback
+++ b/identity/use_feedback
--- a/tests/test_db.py
+++ b/tests/test_db.py
@@ -1,4 +1,4 @@
-from ..db import DB
+from gpt_engineer.db import DB


 def test_db():