mirror of
https://github.com/aljazceru/gpt-engineer.git
synced 2025-12-17 20:55:09 +01:00
Seedling of our engineer is here
This commit is contained in:
33
chat_to_files.py
Normal file
33
chat_to_files.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from ast import List, Tuple
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
def parse_chat(chat):# -> List[Tuple[str, str]]:
|
||||
# Get all ``` blocks
|
||||
regex = r"```(.*?)```"
|
||||
|
||||
matches = re.finditer(regex, chat, re.DOTALL)
|
||||
|
||||
files = []
|
||||
for match in matches:
|
||||
path = match.group(1).split("\n")[0]
|
||||
# Get the code
|
||||
code = match.group(1).split("\n")[1:]
|
||||
code = "\n".join(code)
|
||||
# Add the file to the list
|
||||
files.append((path, code))
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def to_files(chat, path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
with open(os.path.join(path, 'all_output.txt'), "w") as f:
|
||||
f.write(chat)
|
||||
|
||||
files = parse_chat(chat)
|
||||
for file_name, file_content in files:
|
||||
with open(os.path.join(path, file_name), "w") as f:
|
||||
f.write(file_content)
|
||||
64
main.py
Normal file
64
main.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Optional
|
||||
import openai
|
||||
from chat_to_files import to_files
|
||||
import typer
|
||||
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
@app.command()
|
||||
def chat(
|
||||
engine: str = "gpt-4",
|
||||
temperature: float = 0.0,
|
||||
max_tokens: int = 4096,
|
||||
n: int = 1,
|
||||
stream: bool = True,
|
||||
system_prompt: str = typer.Argument("system", help="System prompt file"),
|
||||
user_prompt: str = typer.Argument("user", help="User prompt file"),
|
||||
code_to_file_path: Optional[str] = typer.Option(
|
||||
None, "--out", "-c", help="Code to file path"
|
||||
),
|
||||
):
|
||||
|
||||
# ensure file path corresponds to file in the same file as this script, using __file__
|
||||
if system_prompt == "system":
|
||||
# get folder of script
|
||||
system_prompt = pathlib.Path(__file__).parent / system_prompt
|
||||
|
||||
if user_prompt == "user":
|
||||
user_prompt = pathlib.Path(__file__).parent / user_prompt
|
||||
|
||||
|
||||
with open(system_prompt, "r") as f:
|
||||
system_prompt = f.read()
|
||||
with open(user_prompt, "r") as f:
|
||||
user_prompt = f.read()
|
||||
response = openai.ChatCompletion.create(
|
||||
model=engine,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
stream=stream,
|
||||
stop=None,
|
||||
)
|
||||
|
||||
chat = []
|
||||
for chunk in response:
|
||||
delta = chunk['choices'][0]['delta']
|
||||
msg = delta.get('content', '')
|
||||
print(msg, end="")
|
||||
chat.append(msg)
|
||||
|
||||
if code_to_file_path is not None:
|
||||
to_files("".join(chat), code_to_file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
15
system
Normal file
15
system
Normal file
@@ -0,0 +1,15 @@
|
||||
You will get instructions for code to write.
|
||||
You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
|
||||
|
||||
You will first lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
|
||||
Then you will output the content of each file, with syntax below.
|
||||
(You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.)
|
||||
Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.
|
||||
Ensure to implement all code, if you are unsure, write a plausible implementation.
|
||||
Before you finish, double check that all parts of the architecture is present in the files.
|
||||
|
||||
File syntax:
|
||||
|
||||
```main_file.py
|
||||
[ADD YOUR CODE HERE]
|
||||
```
|
||||
75
user
Normal file
75
user
Normal file
@@ -0,0 +1,75 @@
|
||||
Instructions:
|
||||
We are writing a feature computation framework.
|
||||
|
||||
It will mainly consist of FeatureBuilder classes.
|
||||
|
||||
Each Feature Builder will have the methods:
|
||||
- get(key, context, cache): To first check cache, and then go on to call dependencies to compute the feature. Returns value and hash of value.
|
||||
- dry_run(key, context): To check that "type" of key will match input requirements of features
|
||||
- input_type(context): That explains what dimensions key is applying to
|
||||
- output_type(context): That explains what type the output is
|
||||
|
||||
It will have the class attr:
|
||||
- deps: list of FeatureBuilder classes
|
||||
|
||||
Where it is unclear, please make assumptions and add a commend in the code about it
|
||||
|
||||
Here is an example of Builders we want:
|
||||
|
||||
ProductEmbeddingString: takes product_id, queries the product_db and gets the title as a string
|
||||
ProductEmbedding: takes string and returns and embedding
|
||||
ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings
|
||||
ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db
|
||||
LLMProductPrompt: queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}"
|
||||
LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions
|
||||
LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids
|
||||
|
||||
|
||||
The LLMLogic is the logic_builder in a file such as this one:
|
||||
```
|
||||
def main(merchant, market):
|
||||
cache = get_cache()
|
||||
interaction_data_db = get_interaction_data_db()
|
||||
product_db = get_product_db()
|
||||
merchant_config = get_merchant_config(merchant)[merchant]
|
||||
|
||||
context = Context(
|
||||
interaction_data_db=interaction_data_db,
|
||||
product_db=product_db,
|
||||
merchant_config=merchant_config,
|
||||
)
|
||||
|
||||
product_ids = cache(ProductIds.get)(
|
||||
key=(merchant, market),
|
||||
context=context,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
for logic_builder in merchant_config['logic_builders']:
|
||||
for product_id in product_ids:
|
||||
key = (merchant, market, product_id)
|
||||
p2p_recs = cache(logic_builder.get)(key, cache, context)
|
||||
redis.set(key, p2p_recs)
|
||||
```
|
||||
|
||||
API to product_db:
|
||||
```
|
||||
async def get_product_attribute_dimensions(
|
||||
self,
|
||||
) -> dict[AttributeId, Dimension]:
|
||||
return await self.repository.get_product_attribute_dimensions(self.merchant)
|
||||
|
||||
async def get_products(
|
||||
self,
|
||||
attribute_ids: set[AttributeId],
|
||||
product_ids: set[ProductId] | None = None,
|
||||
) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]:
|
||||
return await self.repository.get_products_dict(
|
||||
self.merchant, attribute_ids, product_ids
|
||||
)
|
||||
```
|
||||
|
||||
(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc)
|
||||
|
||||
|
||||
Remember to read the Instructions carefully.
|
||||
Reference in New Issue
Block a user