chore(benchmark): Upgrade OpenAI client lib from v0 to v1

2026-02-10 08:44:27 +01:00 · 2024-01-16 15:49:46 +01:00
parent 056163ee57
commit 0a4185a919
4 changed files with 33 additions and 23 deletions
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -9,9 +9,9 @@ from abc import ABC
 from pathlib import Path
 from typing import Any, ClassVar, List

-import openai
 import pytest
 from colorama import Fore, Style
+from openai import OpenAI

 from agbenchmark.agent_api_interface import run_api_agent
 from agbenchmark.config import AgentBenchmarkConfig
@@ -198,7 +198,7 @@ class Challenge(ABC):

    @classmethod
    def llm_eval(cls, content: str, ground: Ground) -> float:
-        openai.api_key = os.getenv("OPENAI_API_KEY")
+        openai_client = OpenAI()
        if os.getenv("IS_MOCK"):
            return 1.0

@@ -213,14 +213,14 @@ class Challenge(ABC):

        prompt += END_PROMPT

-        answer = openai.ChatCompletion.create(
+        answer = openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": prompt},
            ],
        )

-        return float(answer["choices"][0]["message"]["content"])  # type: ignore
+        return float(answer.choices[0].message.content)  # type: ignore

    @classmethod
    def get_scores(cls, workspace: Path) -> dict[str, Any]:
--- a/benchmark/notebooks/LLM
+++ b/benchmark/notebooks/LLM
@@ -7,23 +7,21 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import openai\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
    "\n",
    "load_dotenv()\n",
    "\n",
-    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
-    "\n",
    "def llm_eval(evaluation: str) -> float:\n",
-    "    openai.api_key = OPENAI_API_KEY\n",
-    "    answer = openai.ChatCompletion.create(\n",
+    "    openai_client = OpenAI()\n",
+    "    answer = openai_client.chat.completions.create(\n",
    "        model=\"gpt-4\",\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": evaluation},\n",
    "        ],\n",
    "    )\n",
-    "    return answer[\"choices\"][0][\"message\"][\"content\"]"
+    "    return answer.choices[0].message.content"
   ]
  },
  {
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -595,6 +595,17 @@ files = [
    {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"},
 ]

+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.0"
@@ -1581,25 +1592,26 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]

 [[package]]
 name = "openai"
-version = "0.27.10"
-description = "Python client library for the OpenAI API"
+version = "1.7.2"
+description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"},
-    {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"},
+    {file = "openai-1.7.2-py3-none-any.whl", hash = "sha256:8f41b90a762f5fd9d182b45851041386fed94c8ad240a70abefee61a68e0ef53"},
+    {file = "openai-1.7.2.tar.gz", hash = "sha256:c73c78878258b07f1b468b0602c6591f25a1478f49ecb90b9bd44b7cc80bce73"},
 ]

 [package.dependencies]
-aiohttp = "*"
-requests = ">=2.20"
-tqdm = "*"
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+httpx = ">=0.23.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+tqdm = ">4"
+typing-extensions = ">=4.7,<5"

 [package.extras]
-datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"]
-embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"]
-wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"]
+datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]

 [[package]]
 name = "outcome"
@@ -2748,4 +2760,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "5987d20b6d95ede57bcb9182836710dee96cab081f68b7a887f3585d13489adc"
+content-hash = "e0d1f991958a5d630287c7bb668e7fdc6183630e06196cf6f507a086be10baec"
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -11,7 +11,7 @@ packages = [{ include = "agbenchmark" }]
 python = "^3.10"
 pytest = "^7.3.2"
 requests = "^2.31.0"
-openai = "^0.27.8"
+openai = "^1.7.2"
 pydantic = "^1.10.9"
 python-dotenv = "^1.0.0"
 click = "^8.1.3"