mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 12:45:26 +01:00
Clean up clarifying question part
This commit is contained in:
@@ -1 +1,9 @@
|
|||||||
# gpt-engineer
|
# gpt-engineer
|
||||||
|
|
||||||
|
How to use:
|
||||||
|
|
||||||
|
- Install requirements.txt `pip install -r requirements.txt`
|
||||||
|
- Copy the example folder `cp example -r my-new-project`
|
||||||
|
- Edit the file main_prompt in my-new-project
|
||||||
|
- run `python main.py my-new-prompt`
|
||||||
|
- Check the results in my-new-project/workspace
|
||||||
|
|||||||
86
example/main_prompt
Normal file
86
example/main_prompt
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
Instructions:
|
||||||
|
We are writing a feature computation framework.
|
||||||
|
|
||||||
|
It will mainly consist of FeatureBuilder classes.
|
||||||
|
|
||||||
|
Each Feature Builder will have the methods:
|
||||||
|
- get(key, config, context, cache): Call feature builder dependencies and then compute the feature. Returns value and hash of value.
|
||||||
|
- key: tuple of arguments that are used to compute the feature
|
||||||
|
- config: the configuration for the feature
|
||||||
|
- context: dataclass that contains dependencies and general configuration (see below)
|
||||||
|
- controller: object that can be used to get other features (see below)
|
||||||
|
- value: object that can be pickled
|
||||||
|
|
||||||
|
It will have the class attr:
|
||||||
|
- deps: list of FeatureBuilder classes
|
||||||
|
- default_config: function that accepts context and returns a config
|
||||||
|
|
||||||
|
The Controller will have the methods:
|
||||||
|
- get(feature_builder, key, config): Check the cache, and decide to call feature builder and then returns the output and timestamp it was computed
|
||||||
|
- feature_builder: FeatureBuilder class
|
||||||
|
- key: tuple of arguments that are used to compute the feature
|
||||||
|
- configs: dict of configs that are used to compute features
|
||||||
|
|
||||||
|
and the attributes:
|
||||||
|
- context: dataclass that contains dependencies and general configuration (see below)
|
||||||
|
- cache: cache for the features
|
||||||
|
|
||||||
|
Where it is unclear, please make assumptions and add a comment in the code about it
|
||||||
|
|
||||||
|
Here is an example of Builders we want:
|
||||||
|
|
||||||
|
ProductEmbeddingString: takes product_id, queries the product_db and gets the title as a string
|
||||||
|
ProductEmbedding: takes string and returns and embedding
|
||||||
|
ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings
|
||||||
|
ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db
|
||||||
|
LLMProductPrompt: queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}"
|
||||||
|
LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions
|
||||||
|
LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids
|
||||||
|
|
||||||
|
|
||||||
|
The LLMLogic is the logic_builder in a file such as this one:
|
||||||
|
```
|
||||||
|
def main(merchant, market):
|
||||||
|
cache = get_feature_cache()
|
||||||
|
interaction_data_db = get_interaction_data_db()
|
||||||
|
product_db = get_product_db()
|
||||||
|
merchant_config = get_merchant_config(merchant)
|
||||||
|
|
||||||
|
context = Context(
|
||||||
|
interaction_data_db=interaction_data_db,
|
||||||
|
product_db=product_db,
|
||||||
|
merchant_config=merchant_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
product_ids = cache(ProductIds).get(
|
||||||
|
key=(merchant, market),
|
||||||
|
context=context,
|
||||||
|
cache=cache,
|
||||||
|
)
|
||||||
|
|
||||||
|
for logic_builder in merchant_config['logic_builders']:
|
||||||
|
for product_id in product_ids:
|
||||||
|
key = (merchant, market, product_id)
|
||||||
|
p2p_recs = cache(logic_builder).get(key=key, context=context, cache=cache)
|
||||||
|
redis.set(key, p2p_recs)
|
||||||
|
```
|
||||||
|
|
||||||
|
API to product_db:
|
||||||
|
```python
|
||||||
|
async def get_product_attribute_dimensions(
|
||||||
|
self,
|
||||||
|
) -> dict[AttributeId, Dimension]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def get_products(
|
||||||
|
self,
|
||||||
|
attribute_ids: set[AttributeId],
|
||||||
|
product_ids: set[ProductId] | None = None,
|
||||||
|
) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]:
|
||||||
|
pass
|
||||||
|
```
|
||||||
|
(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc)
|
||||||
|
|
||||||
|
---
|
||||||
|
You will focus on writing the integration test file test_all.py.
|
||||||
|
This file will Mock a lot of the necessary interfaces, run the logic LLMLogic and print the results from it.
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
You will improve instructions by reading:
|
|
||||||
1. ORIGINAL INSTRUCTIONS
|
|
||||||
2. CLARIFYING QUESTIONS AND ANSWERS
|
|
||||||
|
|
||||||
As output you will give a new version of the original instruction but where the answers to the clarifying questions have been incorporated to make it completely clear.
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
You will read instructions and NOT carry them out, only seek to CLARIFY them.
|
You will read instructions and not carry them out, only seek to clarify them.
|
||||||
You will carry out the steps:
|
Specifically you will first summarise a list of super short bullets of areas that need clarification.
|
||||||
1. Write a list of super short bullets of areas that are unclear
|
Then you will pick one clarifying question, and wait for an answer from the user.
|
||||||
2. Ask for only one clarifying questions and wait for a reply
|
|
||||||
|
|||||||
@@ -10,6 +10,6 @@ Before you finish, double check that all parts of the architecture is present in
|
|||||||
|
|
||||||
File syntax:
|
File syntax:
|
||||||
|
|
||||||
```main_file.py
|
```file.py
|
||||||
[ADD YOUR CODE HERE]
|
[ADD YOUR CODE HERE]
|
||||||
```
|
```
|
||||||
13
identity/use_qa
Normal file
13
identity/use_qa
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
Please now remember the steps:
|
||||||
|
|
||||||
|
First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
|
||||||
|
Then output the content of each file, with syntax below.
|
||||||
|
(You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.)
|
||||||
|
Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.
|
||||||
|
Before you finish, double check that all parts of the architecture is present in the files.
|
||||||
|
|
||||||
|
File syntax:
|
||||||
|
|
||||||
|
```main_file.py
|
||||||
|
[ADD YOUR CODE HERE]
|
||||||
|
```
|
||||||
26
main.py
26
main.py
@@ -13,30 +13,23 @@ import typer
|
|||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def chat(
|
def chat(
|
||||||
|
project_path: str = typer.Argument(None, help="path"),
|
||||||
|
run_prefix: str = typer.Option("", help="run prefix"),
|
||||||
model: str = "gpt-4",
|
model: str = "gpt-4",
|
||||||
temperature: float = 0.1,
|
temperature: float = 0.1,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
n: int = 1,
|
n: int = 1,
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
input_path: str = typer.Argument(
|
|
||||||
None, help="input path"
|
|
||||||
),
|
|
||||||
memory_path: str = typer.Argument(
|
|
||||||
None, help="memory path"
|
|
||||||
),
|
|
||||||
workspace_path: Optional[str] = typer.Option(
|
|
||||||
None, "--out", "-c", help="Code to file path"
|
|
||||||
),
|
|
||||||
):
|
):
|
||||||
|
|
||||||
if memory_path is None:
|
if project_path is None:
|
||||||
memory_path = pathlib.Path(__file__).parent / 'memory'
|
project_path = str(pathlib.Path(__file__).parent / "example")
|
||||||
|
|
||||||
if input_path is None:
|
input_path = project_path
|
||||||
input_path = pathlib.Path(__file__).parent / 'input'
|
memory_path = pathlib.Path(project_path) / "memory"
|
||||||
|
workspace_path = pathlib.Path(project_path) / (run_prefix + "workspace")
|
||||||
|
|
||||||
ai = AI(
|
ai = AI(
|
||||||
model=model,
|
model=model,
|
||||||
@@ -49,13 +42,12 @@ def chat(
|
|||||||
|
|
||||||
dbs = DBs(
|
dbs = DBs(
|
||||||
memory=DB(memory_path),
|
memory=DB(memory_path),
|
||||||
logs=DB(pathlib.Path(memory_path) / 'logs'),
|
logs=DB(pathlib.Path(memory_path) / "logs"),
|
||||||
input=DB(input_path),
|
input=DB(input_path),
|
||||||
workspace=DB(workspace_path),
|
workspace=DB(workspace_path),
|
||||||
identity=DB(pathlib.Path(__file__).parent / 'identity'),
|
identity=DB(pathlib.Path(__file__).parent / "identity"),
|
||||||
)
|
)
|
||||||
|
|
||||||
run_prefix= workspace_path.split('/')[-1] + '_' if workspace_path is not None else ''
|
|
||||||
|
|
||||||
for step in STEPS:
|
for step in STEPS:
|
||||||
messages = step(ai, dbs)
|
messages = step(ai, dbs)
|
||||||
|
|||||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
openai
|
||||||
|
typer
|
||||||
48
scripts/rerun_edited_message_logs.py
Normal file
48
scripts/rerun_edited_message_logs.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from typing import Optional
|
||||||
|
import openai
|
||||||
|
from chat_to_files import to_files
|
||||||
|
from ai import AI
|
||||||
|
from steps import STEPS
|
||||||
|
from db import DB, DBs
|
||||||
|
import typer
|
||||||
|
|
||||||
|
|
||||||
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def chat(
|
||||||
|
messages_path: str,
|
||||||
|
out_path: str | None = None,
|
||||||
|
model: str = "gpt-4",
|
||||||
|
temperature: float = 0.1,
|
||||||
|
max_tokens: int = 4096,
|
||||||
|
n: int = 1,
|
||||||
|
stream: bool = True,
|
||||||
|
):
|
||||||
|
|
||||||
|
|
||||||
|
ai = AI(
|
||||||
|
model=model, temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
n=n,
|
||||||
|
stream=stream,
|
||||||
|
stop=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(messages_path) as f:
|
||||||
|
messages = json.load(f)
|
||||||
|
|
||||||
|
messages = ai.next(messages)
|
||||||
|
|
||||||
|
if out_path:
|
||||||
|
to_files(messages[-1]['content'], out_path)
|
||||||
|
with open(pathlib.Path(out_path) / 'all_output.txt', 'w') as f:
|
||||||
|
json.dump(messages[-1]['content'], f)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app()
|
||||||
37
steps.py
37
steps.py
@@ -3,6 +3,7 @@ from dataclasses import dataclass
|
|||||||
from typing import Callable
|
from typing import Callable
|
||||||
from ai import AI
|
from ai import AI
|
||||||
from chat_to_files import to_files
|
from chat_to_files import to_files
|
||||||
|
import json
|
||||||
|
|
||||||
from db import DBs
|
from db import DBs
|
||||||
from db import DB
|
from db import DB
|
||||||
@@ -15,36 +16,44 @@ def setup(ai: AI, dbs: DBs):
|
|||||||
to_files(messages[-1]['content'], dbs.workspace)
|
to_files(messages[-1]['content'], dbs.workspace)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def run_clarified(ai: AI, dbs: DBs):
|
|
||||||
messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
|
|
||||||
to_files(messages[-1]['content'], DB(str(dbs.workspace.path)+'_clarified'))
|
|
||||||
return messages
|
|
||||||
|
|
||||||
def clarify(ai: AI, dbs: DBs):
|
def clarify(ai: AI, dbs: DBs):
|
||||||
messages = [ai.fsystem(dbs.identity['qa'])]
|
messages = [ai.fsystem(dbs.identity['qa'])]
|
||||||
user = dbs.input['main_prompt']
|
user = dbs.input['main_prompt']
|
||||||
while True:
|
while True:
|
||||||
messages = ai.next(messages, user)
|
messages = ai.next(messages, user)
|
||||||
|
|
||||||
|
if messages[-1]['content'].strip().lower() == 'no':
|
||||||
|
break
|
||||||
|
|
||||||
print()
|
print()
|
||||||
user = input('Answer: ')
|
user = input('Answer: ')
|
||||||
if not user or user == 'q':
|
if not user or user == 'q':
|
||||||
break
|
break
|
||||||
|
|
||||||
user += '\nIs anything else unclear? Please ask more questions until instructions are sufficient to write the code.'
|
user += '\n\nIs anything else unclear? If everything is sufficiently clear to write the code, just answer "no".'
|
||||||
|
|
||||||
# TOOD: Stop using clarify prompt. Just append questions and answers to the main prompt.
|
return messages
|
||||||
prompt = dbs.identity['clarify']
|
|
||||||
messages = ai.next([ai.fsystem(prompt)] + messages[1:], prompt)
|
def run_clarified(ai: AI, dbs: DBs):
|
||||||
dbs.memory['clarified_prompt'] = messages[-1]['content']
|
# get the messages from previous step
|
||||||
|
messages = json.loads(dbs.logs[clarify.__name__])
|
||||||
|
|
||||||
|
messages = (
|
||||||
|
[
|
||||||
|
ai.fsystem(setup_sys_prompt(dbs)),
|
||||||
|
] +
|
||||||
|
messages[1:]
|
||||||
|
)
|
||||||
|
messages = ai.next(messages, dbs.identity['use_qa'])
|
||||||
|
to_files(messages[-1]['content'], DB(str(dbs.workspace.path)+'_clarified'))
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
# STEPS: List[Callable[[AI, DBs], List]] = [
|
# STEPS: List[Callable[[AI, DBs], List]] = [
|
||||||
STEPS=[
|
STEPS=[
|
||||||
setup,
|
# setup,
|
||||||
# clarify,
|
clarify,
|
||||||
# run_clarified
|
run_clarified
|
||||||
# to_files,
|
|
||||||
# improve_files,
|
# improve_files,
|
||||||
# run_tests,
|
# run_tests,
|
||||||
# ImproveBasedOnHumanComments
|
# ImproveBasedOnHumanComments
|
||||||
|
|||||||
Reference in New Issue
Block a user