From 2b8e056d5d4d14665b88a01c41356253c94b9259 Mon Sep 17 00:00:00 2001
From: Anton Osika <anton.osika@gmail.com>
Date: Sun, 2 Jul 2023 19:17:15 +0200
Subject: [PATCH] =?UTF-8?q?Add=20flow=20to=20ask=20for=20consent=20to=20sh?=
 =?UTF-8?q?are=20learnings=20=E2=80=93=20finally=20(#471)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Consent flow

* Fix pre-commit

* Fix ruff

* Remove codespell

* Remove codespell fully

* whitespace
---
 .gitignore               |  2 ++
 .pre-commit-config.yaml  |  6 ----
 README.md                |  2 --
 gpt_engineer/collect.py  |  5 ----
 gpt_engineer/learning.py | 62 +++++++++++++++++++++++++++++++++++++++-
 gpt_engineer/main.py     |  4 ++-
 pyproject.toml           |  5 ----
 7 files changed, 66 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 548bbb6..58b0fc0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,3 +56,5 @@ projects
 # Benchmark files
 benchmark
 !benchmark/*/prompt
+
+.gpte_consent
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9f87bd5..2bc8970 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,9 +31,3 @@ repos:
       - id: detect-private-key
       - id: end-of-file-fixer
       - id: trailing-whitespace
-
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.5
-    hooks:
-    - id: codespell
-      additional_dependencies: [tomli]
diff --git a/README.md b/README.md
index 4e9fbea..229f4e9 100644
--- a/README.md
+++ b/README.md
@@ -64,8 +64,6 @@ To get started, create a codespace for this repository by clicking this 👇
 
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces)
 
-
-
 ## Features
 
 You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder.
diff --git a/gpt_engineer/collect.py b/gpt_engineer/collect.py
index cf8d260..cb25e6a 100644
--- a/gpt_engineer/collect.py
+++ b/gpt_engineer/collect.py
@@ -1,5 +1,4 @@
 import hashlib
-import os
 
 from typing import List
 
@@ -23,10 +22,6 @@ def send_learning(learning: Learning):
 
 
 def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs):
-    if os.environ.get("COLLECT_LEARNINGS_OPT_IN") in ["false", "1"]:
-        print("COLLECT_LEARNINGS_OPT_IN is set to false, not collecting learning")
-        return
-
     learnings = extract_learning(
         model, temperature, steps, dbs, steps_file_hash=steps_file_hash()
     )
diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py
index becb011..4864473 100644
--- a/gpt_engineer/learning.py
+++ b/gpt_engineer/learning.py
@@ -1,4 +1,5 @@
 import json
+import os
 import random
 import tempfile
 
@@ -83,7 +84,9 @@ def human_input() -> Review:
             "If you have time, please explain what was not working "
             + colored("(ok to leave blank)\n", "light_green")
         )
-    print(colored("Thank you", "light_green"))
+
+    check_consent()
+
     return Review(
         raw=", ".join([ran, perfect, useful]),
         ran={"y": True, "n": False, "u": None, "": None}[ran],
@@ -93,6 +96,63 @@ def human_input() -> Review:
     )
 
 
+def check_consent():
+    path = Path(".gpte_consent")
+    if path.exists() and path.read_text() == "true":
+        return
+    ans = input("Is it ok if we store your prompts to learn? (y/n)")
+    while ans.lower() not in ("y", "n"):
+        ans = input("Invalid input. Please enter y or n: ")
+
+    if ans.lower() == "y":
+        path.write_text("true")
+        print(colored("Thank you️", "light_green"))
+        print()
+        print("(If you change your mind, delete the file .gpte_consent)")
+    else:
+        print(colored("We understand ❤️", "light_green"))
+
+
+def collect_consent() -> bool:
+    opt_out = os.environ.get("COLLECT_LEARNINGS_OPT_OUT") == "true"
+    consent_flag = Path(".gpte_consent")
+    has_given_consent = consent_flag.exists() and consent_flag.read_text() == "true"
+
+    if opt_out:
+        if has_given_consent:
+            return ask_if_can_store()
+        return False
+
+    if has_given_consent:
+        return True
+
+    if ask_if_can_store():
+        consent_flag.write_text("true")
+        print()
+        print("(If you change your mind, delete the file .gpte_consent)")
+        return True
+    return False
+
+
+def ask_if_can_store() -> bool:
+    print()
+    can_store = input(
+        "Have you understood and agree to that "
+        + colored("OpenAI ", "light_green")
+        + "and "
+        + colored("gpt-engineer ", "light_green")
+        + "store anonymous learnings about how gpt-engineer is used "
+        + "(with the sole purpose of improving it)?\n(y/n)"
+    ).lower()
+    while can_store not in ("y", "n"):
+        can_store = input("Invalid input. Please enter y or n: ").lower()
+
+    if can_store == "n":
+        print(colored("Ok we understand", "light_green"))
+
+    return can_store == "y"
+
+
 def logs_to_string(steps: List[Step], logs: DB):
     chunks = []
     for step in steps:
diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py
index 2312f43..e81de45 100644
--- a/gpt_engineer/main.py
+++ b/gpt_engineer/main.py
@@ -8,6 +8,7 @@ import typer
 from gpt_engineer.ai import AI, fallback_model
 from gpt_engineer.collect import collect_learnings
 from gpt_engineer.db import DB, DBs, archive
+from gpt_engineer.learning import collect_consent
 from gpt_engineer.steps import STEPS, Config as StepsConfig
 
 app = typer.Typer()
@@ -57,7 +58,8 @@ def main(
         messages = step(ai, dbs)
         dbs.logs[step.__name__] = json.dumps(messages)
 
-    collect_learnings(model, temperature, steps, dbs)
+    if collect_consent():
+        collect_learnings(model, temperature, steps, dbs)
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
index d7ff26f..d6a00f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,8 +99,3 @@ exclude = '''
   )/
 )
 '''
-
-[tool.codespell]
-skip = '.git,*.pdf,*.svg'
-#
-# ignore-words-list = ''