From 2b8e056d5d4d14665b88a01c41356253c94b9259 Mon Sep 17 00:00:00 2001 From: Anton Osika Date: Sun, 2 Jul 2023 19:17:15 +0200 Subject: [PATCH] =?UTF-8?q?Add=20flow=20to=20ask=20for=20consent=20to=20sh?= =?UTF-8?q?are=20learnings=20=E2=80=93=20finally=20(#471)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Consent flow * Fix pre-commit * Fix ruff * Remove codespell * Remove codespell fully * whitespace --- .gitignore | 2 ++ .pre-commit-config.yaml | 6 ---- README.md | 2 -- gpt_engineer/collect.py | 5 ---- gpt_engineer/learning.py | 62 +++++++++++++++++++++++++++++++++++++++- gpt_engineer/main.py | 4 ++- pyproject.toml | 5 ---- 7 files changed, 66 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 548bbb6..58b0fc0 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,5 @@ projects # Benchmark files benchmark !benchmark/*/prompt + +.gpte_consent diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9f87bd5..2bc8970 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,9 +31,3 @@ repos: - id: detect-private-key - id: end-of-file-fixer - id: trailing-whitespace - - - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 - hooks: - - id: codespell - additional_dependencies: [tomli] diff --git a/README.md b/README.md index 4e9fbea..229f4e9 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,6 @@ To get started, create a codespace for this repository by clicking this 👇 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces) - - ## Features You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder. diff --git a/gpt_engineer/collect.py b/gpt_engineer/collect.py index cf8d260..cb25e6a 100644 --- a/gpt_engineer/collect.py +++ b/gpt_engineer/collect.py @@ -1,5 +1,4 @@ import hashlib -import os from typing import List @@ -23,10 +22,6 @@ def send_learning(learning: Learning): def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs): - if os.environ.get("COLLECT_LEARNINGS_OPT_IN") in ["false", "1"]: - print("COLLECT_LEARNINGS_OPT_IN is set to false, not collecting learning") - return - learnings = extract_learning( model, temperature, steps, dbs, steps_file_hash=steps_file_hash() ) diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py index becb011..4864473 100644 --- a/gpt_engineer/learning.py +++ b/gpt_engineer/learning.py @@ -1,4 +1,5 @@ import json +import os import random import tempfile @@ -83,7 +84,9 @@ def human_input() -> Review: "If you have time, please explain what was not working " + colored("(ok to leave blank)\n", "light_green") ) - print(colored("Thank you", "light_green")) + + check_consent() + return Review( raw=", ".join([ran, perfect, useful]), ran={"y": True, "n": False, "u": None, "": None}[ran], @@ -93,6 +96,63 @@ def human_input() -> Review: ) +def check_consent(): + path = Path(".gpte_consent") + if path.exists() and path.read_text() == "true": + return + ans = input("Is it ok if we store your prompts to learn? (y/n)") + while ans.lower() not in ("y", "n"): + ans = input("Invalid input. Please enter y or n: ") + + if ans.lower() == "y": + path.write_text("true") + print(colored("Thank you️", "light_green")) + print() + print("(If you change your mind, delete the file .gpte_consent)") + else: + print(colored("We understand ❤️", "light_green")) + + +def collect_consent() -> bool: + opt_out = os.environ.get("COLLECT_LEARNINGS_OPT_OUT") == "true" + consent_flag = Path(".gpte_consent") + has_given_consent = consent_flag.exists() and consent_flag.read_text() == "true" + + if opt_out: + if has_given_consent: + return ask_if_can_store() + return False + + if has_given_consent: + return True + + if ask_if_can_store(): + consent_flag.write_text("true") + print() + print("(If you change your mind, delete the file .gpte_consent)") + return True + return False + + +def ask_if_can_store() -> bool: + print() + can_store = input( + "Have you understood and agree to that " + + colored("OpenAI ", "light_green") + + "and " + + colored("gpt-engineer ", "light_green") + + "store anonymous learnings about how gpt-engineer is used " + + "(with the sole purpose of improving it)?\n(y/n)" + ).lower() + while can_store not in ("y", "n"): + can_store = input("Invalid input. Please enter y or n: ").lower() + + if can_store == "n": + print(colored("Ok we understand", "light_green")) + + return can_store == "y" + + def logs_to_string(steps: List[Step], logs: DB): chunks = [] for step in steps: diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py index 2312f43..e81de45 100644 --- a/gpt_engineer/main.py +++ b/gpt_engineer/main.py @@ -8,6 +8,7 @@ import typer from gpt_engineer.ai import AI, fallback_model from gpt_engineer.collect import collect_learnings from gpt_engineer.db import DB, DBs, archive +from gpt_engineer.learning import collect_consent from gpt_engineer.steps import STEPS, Config as StepsConfig app = typer.Typer() @@ -57,7 +58,8 @@ def main( messages = step(ai, dbs) dbs.logs[step.__name__] = json.dumps(messages) - collect_learnings(model, temperature, steps, dbs) + if collect_consent(): + collect_learnings(model, temperature, steps, dbs) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index d7ff26f..d6a00f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,8 +99,3 @@ exclude = ''' )/ ) ''' - -[tool.codespell] -skip = '.git,*.pdf,*.svg' -# -# ignore-words-list = ''