Separate into steps and wrap filesystem access

This commit is contained in:
Anton Osika
2023-05-06 20:11:17 +02:00
parent 026ac206c1
commit 15b353d975
13 changed files with 183 additions and 112 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
input
memory
TODO.md

38
ai.py Normal file
View File

@@ -0,0 +1,38 @@
import openai
class AI:
def __init__(self, **kwargs):
self.kwargs = kwargs
def start(self, system, user):
messages = [
{"role": "system", "content": system},
{"role": "user", "content": user},
]
return self.next(messages)
def fsystem(self, msg):
return {"role": "system", "content": msg}
def fuser(self, msg):
return {"role": "user", "content": msg}
def next(self, messages, prompt=None):
if prompt:
messages = messages + [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
messages=messages,
**self.kwargs
)
chat = []
for chunk in response:
delta = chunk['choices'][0]['delta']
msg = delta.get('content', '')
print(msg, end="")
chat.append(msg)
return messages + [{"role": "assistant", "content": "".join(chat)}]

View File

@@ -21,13 +21,9 @@ def parse_chat(chat):# -> List[Tuple[str, str]]:
return files
def to_files(chat, path):
os.makedirs(path, exist_ok=True)
with open(os.path.join(path, 'all_output.txt'), "w") as f:
f.write(chat)
def to_files(chat, workspace):
workspace['all_output.txt'] = chat
files = parse_chat(chat)
for file_name, file_content in files:
with open(os.path.join(path, file_name), "w") as f:
f.write(file_content)
workspace[file_name] = file_content

28
db.py Normal file
View File

@@ -0,0 +1,28 @@
from dataclasses import dataclass
import os
from pathlib import Path
class DB:
def __init__(self, path):
self.path = Path(path).absolute()
os.makedirs(self.path, exist_ok=True)
def __getitem__(self, key):
with open(self.path / key) as f:
return f.read()
def __setitem__(self, key, val):
with open(self.path / key, 'w') as f:
f.write(val)
# dataclass for all dbs:
@dataclass
class DBs:
memory: DB
logs: DB
identity: DB
input: DB
workspace: DB

5
identity/clarify Normal file
View File

@@ -0,0 +1,5 @@
You will improve instructions by reading:
1. ORIGINAL INSTRUCTIONS
2. CLARIFYING QUESTIONS AND ANSWERS
As output you will give a new version of the original instruction but where the answers to the clarifying questions have been incorporated to make it completely clear.

5
identity/philosophy Normal file
View File

@@ -0,0 +1,5 @@
You almost always put different classes in different files
Python toolbelt preferences:
- pytest
- dataclasses

4
identity/qa Normal file
View File

@@ -0,0 +1,4 @@
You will read instructions and NOT carry them out, only seek to CLARIFY them.
You will carry out the steps:
1. Write a list of super short bullets of areas that are unclear
2. Ask for only one clarifying questions and wait for a reply

62
main.py
View File

@@ -1,47 +1,45 @@
import json
import os
import pathlib
from typing import Optional
import openai
from chat_to_files import to_files
from ai import AI
from steps import STEPS
from db import DB, DBs
import typer
app = typer.Typer()
@app.command()
def chat(
engine: str = "gpt-4",
temperature: float = 0.0,
model: str = "gpt-4",
temperature: float = 0.1,
max_tokens: int = 4096,
n: int = 1,
stream: bool = True,
system_prompt: str = typer.Argument("system", help="System prompt file"),
user_prompt: str = typer.Argument("user", help="User prompt file"),
code_to_file_path: Optional[str] = typer.Option(
input_path: str = typer.Argument(
None, help="input path"
),
memory_path: str = typer.Argument(
None, help="memory path"
),
workspace_path: Optional[str] = typer.Option(
None, "--out", "-c", help="Code to file path"
),
):
# ensure file path corresponds to file in the same file as this script, using __file__
if system_prompt == "system":
# get folder of script
system_prompt = pathlib.Path(__file__).parent / system_prompt
if memory_path is None:
memory_path = pathlib.Path(__file__).parent / 'memory'
if user_prompt == "user":
user_prompt = pathlib.Path(__file__).parent / user_prompt
if input_path is None:
input_path = pathlib.Path(__file__).parent / 'input'
with open(system_prompt, "r") as f:
system_prompt = f.read()
with open(user_prompt, "r") as f:
user_prompt = f.read()
response = openai.ChatCompletion.create(
model=engine,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
ai = AI(
model=model,
temperature=temperature,
max_tokens=max_tokens,
n=n,
@@ -49,15 +47,19 @@ def chat(
stop=None,
)
chat = []
for chunk in response:
delta = chunk['choices'][0]['delta']
msg = delta.get('content', '')
print(msg, end="")
chat.append(msg)
dbs = DBs(
memory=DB(memory_path),
logs=DB(pathlib.Path(memory_path) / 'logs'),
input=DB(input_path),
workspace=DB(workspace_path),
identity=DB(pathlib.Path(__file__).parent / 'identity'),
)
if code_to_file_path is not None:
to_files("".join(chat), code_to_file_path)
run_prefix= workspace_path.split('/')[-1] + '_' if workspace_path is not None else ''
for step in STEPS:
messages = step(ai, dbs)
dbs.logs[run_prefix + step.__name__] = json.dumps(messages)
if __name__ == "__main__":

51
steps.py Normal file
View File

@@ -0,0 +1,51 @@
from ast import List
from dataclasses import dataclass
from typing import Callable
from ai import AI
from chat_to_files import to_files
from db import DBs
from db import DB
def setup_sys_prompt(dbs):
return dbs.identity['setup'] + '\nUseful to know:\n' + dbs.identity['philosophy']
def setup(ai: AI, dbs: DBs):
messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
to_files(messages[-1]['content'], dbs.workspace)
return messages
def run_clarified(ai: AI, dbs: DBs):
messages = ai.start(setup_sys_prompt(dbs), dbs.input['main_prompt'])
to_files(messages[-1]['content'], DB(str(dbs.workspace.path)+'_clarified'))
return messages
def clarify(ai: AI, dbs: DBs):
messages = [ai.fsystem(dbs.identity['qa'])]
user = dbs.input['main_prompt']
while True:
messages = ai.next(messages, user)
print()
user = input('Answer: ')
if not user or user == 'q':
break
user += '\nIs anything else unclear? Please ask more questions until instructions are sufficient to write the code.'
# TOOD: Stop using clarify prompt. Just append questions and answers to the main prompt.
prompt = dbs.identity['clarify']
messages = ai.next([ai.fsystem(prompt)] + messages[1:], prompt)
dbs.memory['clarified_prompt'] = messages[-1]['content']
return messages
# STEPS: List[Callable[[AI, DBs], List]] = [
STEPS=[
setup,
# clarify,
# run_clarified
# to_files,
# improve_files,
# run_tests,
# ImproveBasedOnHumanComments
]

0
tests/__init__.py Normal file
View File

14
tests/test_db.py Normal file
View File

@@ -0,0 +1,14 @@
from ..db import DB
def test_db():
# use /tmp for testing
db = DB('/tmp/test_db')
db['test'] = 'test'
assert db['test'] == 'test'
db['test'] = 'test2'
assert db['test'] == 'test2'
db['test2'] = 'test2'
assert db['test2'] == 'test2'
assert db['test'] == 'test2'
print('test_db passed')

75
user
View File

@@ -1,75 +0,0 @@
Instructions:
We are writing a feature computation framework.
It will mainly consist of FeatureBuilder classes.
Each Feature Builder will have the methods:
- get(key, context, cache): To first check cache, and then go on to call dependencies to compute the feature. Returns value and hash of value.
- dry_run(key, context): To check that "type" of key will match input requirements of features
- input_type(context): That explains what dimensions key is applying to
- output_type(context): That explains what type the output is
It will have the class attr:
- deps: list of FeatureBuilder classes
Where it is unclear, please make assumptions and add a commend in the code about it
Here is an example of Builders we want:
ProductEmbeddingString: takes product_id, queries the product_db and gets the title as a string
ProductEmbedding: takes string and returns and embedding
ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings
ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db
LLMProductPrompt: queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}"
LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions
LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids
The LLMLogic is the logic_builder in a file such as this one:
```
def main(merchant, market):
cache = get_cache()
interaction_data_db = get_interaction_data_db()
product_db = get_product_db()
merchant_config = get_merchant_config(merchant)[merchant]
context = Context(
interaction_data_db=interaction_data_db,
product_db=product_db,
merchant_config=merchant_config,
)
product_ids = cache(ProductIds.get)(
key=(merchant, market),
context=context,
cache=cache,
)
for logic_builder in merchant_config['logic_builders']:
for product_id in product_ids:
key = (merchant, market, product_id)
p2p_recs = cache(logic_builder.get)(key, cache, context)
redis.set(key, p2p_recs)
```
API to product_db:
```
async def get_product_attribute_dimensions(
self,
) -> dict[AttributeId, Dimension]:
return await self.repository.get_product_attribute_dimensions(self.merchant)
async def get_products(
self,
attribute_ids: set[AttributeId],
product_ids: set[ProductId] | None = None,
) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]:
return await self.repository.get_products_dict(
self.merchant, attribute_ids, product_ids
)
```
(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc)
Remember to read the Instructions carefully.