mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 12:45:26 +01:00
Separate into steps and wrap filesystem access
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
input
|
||||||
|
memory
|
||||||
|
TODO.md
|
||||||
38
ai.py
Normal file
38
ai.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
import openai
|
||||||
|
|
||||||
|
|
||||||
|
class AI:
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
def start(self, system, user):
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system},
|
||||||
|
{"role": "user", "content": user},
|
||||||
|
]
|
||||||
|
|
||||||
|
return self.next(messages)
|
||||||
|
|
||||||
|
def fsystem(self, msg):
|
||||||
|
return {"role": "system", "content": msg}
|
||||||
|
|
||||||
|
def fuser(self, msg):
|
||||||
|
return {"role": "user", "content": msg}
|
||||||
|
|
||||||
|
def next(self, messages, prompt=None):
|
||||||
|
if prompt:
|
||||||
|
messages = messages + [{"role": "user", "content": prompt}]
|
||||||
|
|
||||||
|
response = openai.ChatCompletion.create(
|
||||||
|
messages=messages,
|
||||||
|
**self.kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
chat = []
|
||||||
|
for chunk in response:
|
||||||
|
delta = chunk['choices'][0]['delta']
|
||||||
|
msg = delta.get('content', '')
|
||||||
|
print(msg, end="")
|
||||||
|
chat.append(msg)
|
||||||
|
return messages + [{"role": "assistant", "content": "".join(chat)}]
|
||||||
@@ -21,13 +21,9 @@ def parse_chat(chat):# -> List[Tuple[str, str]]:
|
|||||||
return files
|
return files
|
||||||
|
|
||||||
|
|
||||||
def to_files(chat, path):
|
def to_files(chat, workspace):
|
||||||
os.makedirs(path, exist_ok=True)
|
workspace['all_output.txt'] = chat
|
||||||
|
|
||||||
with open(os.path.join(path, 'all_output.txt'), "w") as f:
|
|
||||||
f.write(chat)
|
|
||||||
|
|
||||||
files = parse_chat(chat)
|
files = parse_chat(chat)
|
||||||
for file_name, file_content in files:
|
for file_name, file_content in files:
|
||||||
with open(os.path.join(path, file_name), "w") as f:
|
workspace[file_name] = file_content
|
||||||
f.write(file_content)
|
|
||||||
28
db.py
Normal file
28
db.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class DB:
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = Path(path).absolute()
|
||||||
|
os.makedirs(self.path, exist_ok=True)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
with open(self.path / key) as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def __setitem__(self, key, val):
|
||||||
|
with open(self.path / key, 'w') as f:
|
||||||
|
f.write(val)
|
||||||
|
|
||||||
|
|
||||||
|
# dataclass for all dbs:
|
||||||
|
@dataclass
|
||||||
|
class DBs:
|
||||||
|
memory: DB
|
||||||
|
logs: DB
|
||||||
|
identity: DB
|
||||||
|
input: DB
|
||||||
|
workspace: DB
|
||||||
5
identity/clarify
Normal file
5
identity/clarify
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
You will improve instructions by reading:
|
||||||
|
1. ORIGINAL INSTRUCTIONS
|
||||||
|
2. CLARIFYING QUESTIONS AND ANSWERS
|
||||||
|
|
||||||
|
As output you will give a new version of the original instruction but where the answers to the clarifying questions have been incorporated to make it completely clear.
|
||||||
5
identity/philosophy
Normal file
5
identity/philosophy
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
You almost always put different classes in different files
|
||||||
|
|
||||||
|
Python toolbelt preferences:
|
||||||
|
- pytest
|
||||||
|
- dataclasses
|
||||||
4
identity/qa
Normal file
4
identity/qa
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
You will read instructions and NOT carry them out, only seek to CLARIFY them.
|
||||||
|
You will carry out the steps:
|
||||||
|
1. Write a list of super short bullets of areas that are unclear
|
||||||
|
2. Ask for only one clarifying questions and wait for a reply
|
||||||
62
main.py
62
main.py
@@ -1,47 +1,45 @@
|
|||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import openai
|
import openai
|
||||||
from chat_to_files import to_files
|
from chat_to_files import to_files
|
||||||
|
from ai import AI
|
||||||
|
from steps import STEPS
|
||||||
|
from db import DB, DBs
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def chat(
|
def chat(
|
||||||
engine: str = "gpt-4",
|
model: str = "gpt-4",
|
||||||
temperature: float = 0.0,
|
temperature: float = 0.1,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
n: int = 1,
|
n: int = 1,
|
||||||
stream: bool = True,
|
stream: bool = True,
|
||||||
system_prompt: str = typer.Argument("system", help="System prompt file"),
|
input_path: str = typer.Argument(
|
||||||
user_prompt: str = typer.Argument("user", help="User prompt file"),
|
None, help="input path"
|
||||||
code_to_file_path: Optional[str] = typer.Option(
|
),
|
||||||
|
memory_path: str = typer.Argument(
|
||||||
|
None, help="memory path"
|
||||||
|
),
|
||||||
|
workspace_path: Optional[str] = typer.Option(
|
||||||
None, "--out", "-c", help="Code to file path"
|
None, "--out", "-c", help="Code to file path"
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
|
|
||||||
# ensure file path corresponds to file in the same file as this script, using __file__
|
if memory_path is None:
|
||||||
if system_prompt == "system":
|
memory_path = pathlib.Path(__file__).parent / 'memory'
|
||||||
# get folder of script
|
|
||||||
system_prompt = pathlib.Path(__file__).parent / system_prompt
|
|
||||||
|
|
||||||
if user_prompt == "user":
|
if input_path is None:
|
||||||
user_prompt = pathlib.Path(__file__).parent / user_prompt
|
input_path = pathlib.Path(__file__).parent / 'input'
|
||||||
|
|
||||||
|
ai = AI(
|
||||||
with open(system_prompt, "r") as f:
|
model=model,
|
||||||
system_prompt = f.read()
|
|
||||||
with open(user_prompt, "r") as f:
|
|
||||||
user_prompt = f.read()
|
|
||||||
response = openai.ChatCompletion.create(
|
|
||||||
model=engine,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": system_prompt},
|
|
||||||
{"role": "user", "content": user_prompt},
|
|
||||||
],
|
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
n=n,
|
n=n,
|
||||||
@@ -49,15 +47,19 @@ def chat(
|
|||||||
stop=None,
|
stop=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
chat = []
|
dbs = DBs(
|
||||||
for chunk in response:
|
memory=DB(memory_path),
|
||||||
delta = chunk['choices'][0]['delta']
|
logs=DB(pathlib.Path(memory_path) / 'logs'),
|
||||||
msg = delta.get('content', '')
|
input=DB(input_path),
|
||||||
print(msg, end="")
|
workspace=DB(workspace_path),
|
||||||
chat.append(msg)
|
identity=DB(pathlib.Path(__file__).parent / 'identity'),
|
||||||
|
)
|
||||||
|
|
||||||
if code_to_file_path is not None:
|
run_prefix= workspace_path.split('/')[-1] + '_' if workspace_path is not None else ''
|
||||||
to_files("".join(chat), code_to_file_path)
|
|
||||||
|
for step in STEPS:
|
||||||
|
messages = step(ai, dbs)
|
||||||
|
dbs.logs[run_prefix + step.__name__] = json.dumps(messages)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
51
steps.py
Normal file
51
steps.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from ast import List
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Callable
|
||||||
|
from ai import AI
|
||||||
|
from chat_to_files import to_files
|
||||||
|
|
||||||
|
from db import DBs
|
||||||
|
from db import DB
|
||||||
|
|
||||||
|
def setup_sys_prompt(dbs):
|
||||||
|
return dbs.identity['setup'] + '\nUseful to know:\n' + dbs.identity['philosophy']
|
||||||
|
|
||||||
|
def setup(ai: AI, dbs: DBs):
|
||||||
|
messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
|
||||||
|
to_files(messages[-1]['content'], dbs.workspace)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def run_clarified(ai: AI, dbs: DBs):
|
||||||
|
messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
|
||||||
|
to_files(messages[-1]['content'], DB(str(dbs.workspace.path)+'_clarified'))
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def clarify(ai: AI, dbs: DBs):
|
||||||
|
messages = [ai.fsystem(dbs.identity['qa'])]
|
||||||
|
user = dbs.input['main_prompt']
|
||||||
|
while True:
|
||||||
|
messages = ai.next(messages, user)
|
||||||
|
print()
|
||||||
|
user = input('Answer: ')
|
||||||
|
if not user or user == 'q':
|
||||||
|
break
|
||||||
|
|
||||||
|
user += '\nIs anything else unclear? Please ask more questions until instructions are sufficient to write the code.'
|
||||||
|
|
||||||
|
# TOOD: Stop using clarify prompt. Just append questions and answers to the main prompt.
|
||||||
|
prompt = dbs.identity['clarify']
|
||||||
|
messages = ai.next([ai.fsystem(prompt)] + messages[1:], prompt)
|
||||||
|
dbs.memory['clarified_prompt'] = messages[-1]['content']
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
# STEPS: List[Callable[[AI, DBs], List]] = [
|
||||||
|
STEPS=[
|
||||||
|
setup,
|
||||||
|
# clarify,
|
||||||
|
# run_clarified
|
||||||
|
# to_files,
|
||||||
|
# improve_files,
|
||||||
|
# run_tests,
|
||||||
|
# ImproveBasedOnHumanComments
|
||||||
|
]
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
14
tests/test_db.py
Normal file
14
tests/test_db.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from ..db import DB
|
||||||
|
|
||||||
|
|
||||||
|
def test_db():
|
||||||
|
# use /tmp for testing
|
||||||
|
db = DB('/tmp/test_db')
|
||||||
|
db['test'] = 'test'
|
||||||
|
assert db['test'] == 'test'
|
||||||
|
db['test'] = 'test2'
|
||||||
|
assert db['test'] == 'test2'
|
||||||
|
db['test2'] = 'test2'
|
||||||
|
assert db['test2'] == 'test2'
|
||||||
|
assert db['test'] == 'test2'
|
||||||
|
print('test_db passed')
|
||||||
75
user
75
user
@@ -1,75 +0,0 @@
|
|||||||
Instructions:
|
|
||||||
We are writing a feature computation framework.
|
|
||||||
|
|
||||||
It will mainly consist of FeatureBuilder classes.
|
|
||||||
|
|
||||||
Each Feature Builder will have the methods:
|
|
||||||
- get(key, context, cache): To first check cache, and then go on to call dependencies to compute the feature. Returns value and hash of value.
|
|
||||||
- dry_run(key, context): To check that "type" of key will match input requirements of features
|
|
||||||
- input_type(context): That explains what dimensions key is applying to
|
|
||||||
- output_type(context): That explains what type the output is
|
|
||||||
|
|
||||||
It will have the class attr:
|
|
||||||
- deps: list of FeatureBuilder classes
|
|
||||||
|
|
||||||
Where it is unclear, please make assumptions and add a commend in the code about it
|
|
||||||
|
|
||||||
Here is an example of Builders we want:
|
|
||||||
|
|
||||||
ProductEmbeddingString: takes product_id, queries the product_db and gets the title as a string
|
|
||||||
ProductEmbedding: takes string and returns and embedding
|
|
||||||
ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings
|
|
||||||
ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db
|
|
||||||
LLMProductPrompt: queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}"
|
|
||||||
LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions
|
|
||||||
LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids
|
|
||||||
|
|
||||||
|
|
||||||
The LLMLogic is the logic_builder in a file such as this one:
|
|
||||||
```
|
|
||||||
def main(merchant, market):
|
|
||||||
cache = get_cache()
|
|
||||||
interaction_data_db = get_interaction_data_db()
|
|
||||||
product_db = get_product_db()
|
|
||||||
merchant_config = get_merchant_config(merchant)[merchant]
|
|
||||||
|
|
||||||
context = Context(
|
|
||||||
interaction_data_db=interaction_data_db,
|
|
||||||
product_db=product_db,
|
|
||||||
merchant_config=merchant_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
product_ids = cache(ProductIds.get)(
|
|
||||||
key=(merchant, market),
|
|
||||||
context=context,
|
|
||||||
cache=cache,
|
|
||||||
)
|
|
||||||
|
|
||||||
for logic_builder in merchant_config['logic_builders']:
|
|
||||||
for product_id in product_ids:
|
|
||||||
key = (merchant, market, product_id)
|
|
||||||
p2p_recs = cache(logic_builder.get)(key, cache, context)
|
|
||||||
redis.set(key, p2p_recs)
|
|
||||||
```
|
|
||||||
|
|
||||||
API to product_db:
|
|
||||||
```
|
|
||||||
async def get_product_attribute_dimensions(
|
|
||||||
self,
|
|
||||||
) -> dict[AttributeId, Dimension]:
|
|
||||||
return await self.repository.get_product_attribute_dimensions(self.merchant)
|
|
||||||
|
|
||||||
async def get_products(
|
|
||||||
self,
|
|
||||||
attribute_ids: set[AttributeId],
|
|
||||||
product_ids: set[ProductId] | None = None,
|
|
||||||
) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]:
|
|
||||||
return await self.repository.get_products_dict(
|
|
||||||
self.merchant, attribute_ids, product_ids
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc)
|
|
||||||
|
|
||||||
|
|
||||||
Remember to read the Instructions carefully.
|
|
||||||
Reference in New Issue
Block a user